431 files changed, 32899 insertions, 18184 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 76e362277179..a5d3503b353c 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -3,11 +3,11 @@ obj-y				+= kernel/
 obj-y				+= mm/
 obj-$(CONFIG_KVM)		+= kvm/
 obj-y				+= crypto/
-obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
+obj-$(CONFIG_S390_HYPFS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
-obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
+obj-$(CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY) += purgatory/
 
 # for cleaning
 subdir- += boot tools
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 318fce77601d..0c16dc443e2f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -17,9 +17,12 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	def_bool n
 
-config GENERIC_HWEIGHT
+config ARCH_PROC_KCORE_TEXT
 	def_bool y
 
+config GENERIC_HWEIGHT
+	def_bool !HAVE_MARCH_Z196_FEATURES
+
 config GENERIC_BUG
 	def_bool y if BUG
 
@@ -38,9 +41,6 @@ config AUDIT_ARCH
 config NO_IOPORT_MAP
 	def_bool y
 
-config PCI_QUIRKS
-	def_bool n
-
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
@@ -49,6 +49,19 @@ config KASAN_SHADOW_OFFSET
 	depends on KASAN
 	default 0x1C000000000000
 
+config CC_ASM_FLAG_OUTPUT_BROKEN
+	def_bool CC_IS_GCC && GCC_VERSION < 140200
+	help
+	  GCC versions before 14.2.0 may die with an internal
+	  compiler error in some configurations if flag output
+	  operands are used within inline assemblies.
+
+config CC_HAS_ASM_AOR_FORMAT_FLAGS
+	def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100)
+	help
+	  Clang versions before 19.1.0 do not support A,
+	  O, and R inline assembly format flags.
+
 config S390
 	def_bool y
 	#
@@ -57,30 +70,38 @@ config S390
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
 	select ALTERNATE_USER_ADDRESS_SPACE
 	select ARCH_32BIT_USTAT_F_TINODE
-	select ARCH_BINFMT_ELF_STATE
 	select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+	select ARCH_HAS_CPU_FINALIZE_INIT
+	select ARCH_HAS_CRC32
 	select ARCH_HAS_CURRENT_STACK_POINTER
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_OPS if PCI
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_KCOV
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_MEM_ENCRYPT
+	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+	select ARCH_HAS_PREEMPT_LAZY
+	select ARCH_HAS_PTDUMP
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SCALED_CPUTIME
+	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYSCALL_WRAPPER
-	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_HAS_VDSO_DATA
+	select ARCH_HAS_UBSAN
+	select ARCH_HAS_VDSO_TIME_DATA
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
@@ -110,61 +131,78 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select ARCH_SUPPORTS_HUGETLBFS
+	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
+	select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_SUPPORTS_PER_VMA_LOCK
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
-	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_USE_SYM_ANNOTATIONS
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+	select ARCH_WANT_KERNEL_PMD_MKWRITE
+	select ARCH_WANT_LD_ORPHAN_WARN
+	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
-	select DMA_OPS if PCI
+	select DCACHE_WORD_ACCESS if !KMSAN
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
-	select GCC12_NO_ARRAY_BOUNDS
+	select FUNCTION_ALIGNMENT_8B if CC_IS_GCC
+	select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC
 	select GENERIC_ALLOCATOR
+	select GENERIC_CPU_DEVICES
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY
-	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
+	select GENERIC_VDSO_DATA_STORE
 	select GENERIC_VDSO_TIME_NS
-	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+	select GENERIC_IOREMAP if PCI
+	select HAVE_ALIGNED_STRUCT_PAGE
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_VMALLOC
 	select HAVE_ARCH_KCSAN
+	select HAVE_ARCH_KMSAN
 	select HAVE_ARCH_KFENCE
 	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
+	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_VMAP_STACK
 	select HAVE_ASM_MODVERSIONS
+	select HAVE_BUILDTIME_MCOUNT_SORT
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+	select HAVE_FTRACE_REGS_HAVING_PT_REGS
 	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
-	select HAVE_FAST_GUP
+	select HAVE_GUP_FAST
 	select HAVE_FENTRY
+	select HAVE_FTRACE_GRAPH_FUNC
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUNCTION_ERROR_INJECTION
+	select HAVE_FUNCTION_GRAPH_FREGS
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
@@ -181,45 +219,57 @@ config S390
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
-	select HAVE_KVM
 	select HAVE_LIVEPATCH
 	select HAVE_MEMBLOCK_PHYS_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_NOP_MCOUNT
+	select HAVE_PAGE_SIZE_4KB
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_PREEMPT_DYNAMIC_KEY
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE
+	select HAVE_RETHOOK
 	select HAVE_RSEQ
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
+	select HAVE_SETUP_PER_CPU_AREA
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HAVE_VIRT_CPU_ACCOUNTING_IDLE
+	select HOTPLUG_SMT
 	select IOMMU_HELPER		if PCI
 	select IOMMU_SUPPORT		if PCI
+	select KASAN_VMALLOC if KASAN
+	select LOCK_MM_AND_FIND_VMA
+	select MMU_GATHER_MERGE_VMAS
 	select MMU_GATHER_NO_GATHER
 	select MMU_GATHER_RCU_TABLE_FREE
-	select MMU_GATHER_MERGE_VMAS
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE	if PCI
+	select NEED_PER_CPU_EMBED_FIRST_CHUNK
+	select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE
 	select NEED_SG_DMA_LENGTH	if PCI
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select PCI_DOMAINS		if PCI
 	select PCI_MSI			if PCI
 	select PCI_MSI_ARCH_FALLBACKS	if PCI_MSI
+	select PCI_QUIRKS		if PCI
 	select SPARSE_IRQ
 	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
 	select TTY
+	select USER_STACKTRACE_SUPPORT
+	select VDSO_GETRANDOM
 	select VIRT_CPU_ACCOUNTING
+	select VMAP_STACK
 	select ZONE_DMA
 	# Note: keep the above list sorted alphabetically
 
@@ -232,6 +282,28 @@ config PGTABLE_LEVELS
 
 source "kernel/livepatch/Kconfig"
 
+config ARCH_SUPPORTS_KEXEC
+	def_bool y
+
+config ARCH_SUPPORTS_KEXEC_FILE
+	def_bool y
+
+config ARCH_SUPPORTS_KEXEC_SIG
+	def_bool MODULE_SIG_FORMAT
+
+config ARCH_SUPPORTS_KEXEC_PURGATORY
+	def_bool y
+
+config ARCH_SUPPORTS_CRASH_DUMP
+	def_bool y
+	help
+	  Refer to <file:Documentation/arch/s390/zfcpdump.rst> for more details on this.
+	  This option also enables s390 zfcpdump.
+	  See also <file:Documentation/arch/s390/zfcpdump.rst>
+
+config ARCH_DEFAULT_CRASH_DUMP
+	def_bool y
+
 menu "Processor type and features"
 
 config HAVE_MARCH_Z10_FEATURES
@@ -261,6 +333,10 @@ config HAVE_MARCH_Z16_FEATURES
 	def_bool n
 	select HAVE_MARCH_Z15_FEATURES
 
+config HAVE_MARCH_Z17_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z16_FEATURES
+
 choice
 	prompt "Processor type"
 	default MARCH_Z196
@@ -326,6 +402,14 @@ config MARCH_Z16
 	  Select this to enable optimizations for IBM z16 (3931 and
 	  3932 series).
 
+config MARCH_Z17
+	bool "IBM z17"
+	select HAVE_MARCH_Z17_FEATURES
+	depends on $(cc-option,-march=z17)
+	help
+	  Select this to enable optimizations for IBM z17 (9175 and
+	  9176 series).
+
 endchoice
 
 config MARCH_Z10_TUNE
@@ -349,6 +433,9 @@ config MARCH_Z15_TUNE
 config MARCH_Z16_TUNE
 	def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
 
+config MARCH_Z17_TUNE
+	def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT
+
 choice
 	prompt "Tune code generation"
 	default TUNE_DEFAULT
@@ -393,6 +480,10 @@ config TUNE_Z16
 	bool "IBM z16"
 	depends on $(cc-option,-mtune=z16)
 
+config TUNE_Z17
+	bool "IBM z17"
+	depends on $(cc-option,-mtune=z17)
+
 endchoice
 
 config 64BIT
@@ -407,18 +498,20 @@ config COMMAND_LINE_SIZE
 	  line.
 
 config COMPAT
-	def_bool y
+	def_bool n
 	prompt "Kernel support for 31 bit emulation"
 	select ARCH_WANT_OLD_COMPAT_IPC
 	select COMPAT_OLD_SIGACTION
 	select HAVE_UID16
 	depends on MULTIUSER
-	depends on !CC_IS_CLANG
+	depends on !CC_IS_CLANG && !LD_IS_LLD
 	help
 	  Select this option if you want to enable your system kernel to
 	  handle system-calls from ELF binaries for 31 bit ESA.  This option
 	  (and some other stuff like libraries and such) is needed for
-	  executing 31 bit applications.  It is safe to say "Y".
+	  executing 31 bit applications.
+
+	  If unsure say N.
 
 config SMP
 	def_bool y
@@ -458,55 +551,48 @@ config SCHED_SMT
 config SCHED_MC
 	def_bool n
 
-config SCHED_BOOK
-	def_bool n
-
-config SCHED_DRAWER
-	def_bool n
-
 config SCHED_TOPOLOGY
 	def_bool y
 	prompt "Topology scheduler support"
 	select SCHED_SMT
 	select SCHED_MC
-	select SCHED_BOOK
-	select SCHED_DRAWER
 	help
 	  Topology scheduler support improves the CPU scheduler's decision
 	  making when dealing with machines that have multi-threading,
 	  multiple cores or multiple books.
 
-source "kernel/Kconfig.hz"
-
-config KEXEC
+config SCHED_TOPOLOGY_VERTICAL
 	def_bool y
-	select KEXEC_CORE
-
-config KEXEC_FILE
-	bool "kexec file based system call"
-	select KEXEC_CORE
-	depends on CRYPTO
-	depends on CRYPTO_SHA256
-	depends on CRYPTO_SHA256_S390
+	bool "Use vertical CPU polarization by default"
+	depends on SCHED_TOPOLOGY
 	help
-	  Enable the kexec file based system call. In contrast to the normal
-	  kexec system call this system call takes file descriptors for the
-	  kernel and initramfs as arguments.
+	  Use vertical CPU polarization by default if available.
+	  The default CPU polarization is horizontal.
 
-config ARCH_HAS_KEXEC_PURGATORY
+config HIPERDISPATCH_ON
 	def_bool y
-	depends on KEXEC_FILE
+	bool "Use hiperdispatch on vertical polarization by default"
+	depends on SCHED_TOPOLOGY
+	depends on PROC_SYSCTL
+	help
+	  Hiperdispatch aims to improve the CPU scheduler's decision
+	  making when using vertical polarization by adjusting CPU
+	  capacities dynamically. Set this option to use hiperdispatch
+	  on vertical polarization by default. This can be overwritten
+	  by sysctl's s390.hiperdispatch attribute later on.
+
+source "kernel/Kconfig.hz"
 
-config KEXEC_SIG
-	bool "Verify kernel signature during kexec_file_load() syscall"
-	depends on KEXEC_FILE && MODULE_SIG_FORMAT
+config CERT_STORE
+	bool "Get user certificates via DIAG320"
+	depends on KEYS
+	select CRYPTO_LIB_SHA256
 	help
-	  This option makes kernel signature verification mandatory for
-	  the kexec_file_load() syscall.
+	  Enable this option if you want to access user-provided secure boot
+	  certificates via DIAG 0x320.
 
-	  In addition to that option, you need to enable signature
-	  verification for the corresponding kernel image type being
-	  loaded in order for this to work.
+	  These certificates will be made available via the keyring named
+	  'cert_store'.
 
 config KERNEL_NOBP
 	def_bool n
@@ -539,17 +625,13 @@ config EXPOLINE
 	  If unsure, say N.
 
 config EXPOLINE_EXTERN
-	def_bool n
-	depends on EXPOLINE
-	depends on CC_IS_GCC && GCC_VERSION >= 110200
-	depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
-	prompt "Generate expolines as extern functions."
+	def_bool EXPOLINE && CC_IS_GCC && GCC_VERSION >= 110200 && \
+		 $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
 	help
-	  This option is required for some tooling like kpatch. The kernel is
-	  compiled with -mindirect-branch=thunk-extern and requires a newer
-	  compiler.
-
-	  If unsure, say N.
+	  Generate expolines as external functions if the compiler supports it.
+	  This option is required for some tooling like kpatch, if expolines
+	  are enabled. The kernel is compiled with
+	  -mindirect-branch=thunk-extern, which requires a newer compiler.
 
 choice
 	prompt "Expoline default"
@@ -568,20 +650,18 @@ config EXPOLINE_FULL
 endchoice
 
 config RELOCATABLE
-	bool "Build a relocatable kernel"
-	default y
+	def_bool y
+	select ARCH_VMLINUX_NEEDS_RELOCS
 	help
 	  This builds a kernel image that retains relocation information
 	  so it can be loaded at an arbitrary address.
-	  The kernel is linked as a position-independent executable (PIE)
-	  and contains dynamic relocations which are processed early in the
-	  bootup process.
 	  The relocations make the kernel image about 15% larger (compressed
 	  10%), but are discarded at runtime.
+	  Note: this option exists only for documentation purposes, please do
+	  not remove it.
 
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image (KASLR)"
-	depends on RELOCATABLE
 	default y
 	help
 	  In support of Kernel Address Space Layout Randomization (KASLR),
@@ -589,6 +669,38 @@ config RANDOMIZE_BASE
 	  as a security feature that deters exploit attempts relying on
 	  knowledge of the location of kernel internals.
 
+config RANDOMIZE_IDENTITY_BASE
+	bool "Randomize the address of the identity mapping base"
+	depends on RANDOMIZE_BASE
+	default DEBUG_VM
+	help
+	  The identity mapping base address is pinned to zero by default.
+	  Allow randomization of that base to expose otherwise missed
+	  notion of physical and virtual addresses of data structures.
+	  That does not have any impact on the base address at which the
+	  kernel image is loaded.
+
+	  If unsure, say N
+
+config KERNEL_IMAGE_BASE
+	hex "Kernel image base address"
+	range 0x100000 0x1FFFFFE0000000 if !KASAN
+	range 0x100000 0x1BFFFFE0000000 if KASAN
+	default 0x3FFE0000000 if !KASAN
+	default 0x7FFFE0000000 if KASAN
+	help
+	  This is the address at which the kernel image is loaded in case
+	  Kernel Address Space Layout Randomization (KASLR) is disabled.
+
+	  In case the Protected virtualization guest support is enabled the
+	  Ultravisor imposes a virtual address limit. If the value of this
+	  option leads to the kernel image exceeding the Ultravisor limit,
+	  this option is ignored and the image is loaded below the limit.
+
+	  If the value of this option leads to the kernel image overlapping
+	  the virtual memory where other data structures are located, this
+	  option is ignored and the image is loaded above the structures.
+
 endmenu
 
 menu "Memory setup"
@@ -611,32 +723,6 @@ config MAX_PHYSMEM_BITS
 	  Increasing the number of bits also increases the kernel image size.
 	  By default 46 bits (64TB) are supported.
 
-config CHECK_STACK
-	def_bool y
-	depends on !VMAP_STACK
-	prompt "Detect kernel stack overflow"
-	help
-	  This option enables the compiler option -mstack-guard and
-	  -mstack-size if they are available. If the compiler supports them
-	  it will emit additional code to each function prolog to trigger
-	  an illegal operation if the kernel stack is about to overflow.
-
-	  Say N if you are unsure.
-
-config STACK_GUARD
-	int "Size of the guard area (128-1024)"
-	range 128 1024
-	depends on CHECK_STACK
-	default "256"
-	help
-	  This allows you to specify the size of the guard area at the lower
-	  end of the kernel stack. If the kernel stack points into the guard
-	  area on function entry an illegal operation is triggered. The size
-	  needs to be a power of 2. Please keep in mind that the size of an
-	  interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
-	  The minimum size for the stack guard should be 256 for 31 bit and
-	  512 for 64 bit.
-
 endmenu
 
 menu "I/O subsystem"
@@ -702,10 +788,38 @@ config EADM_SCH
 	  To compile this driver as a module, choose M here: the
 	  module will be called eadm_sch.
 
+config AP
+	def_tristate y
+	prompt "Support for Adjunct Processors (ap)"
+	help
+	  This driver allows usage to Adjunct Processor (AP) devices via
+	  the ap bus, cards and queues. Supported Adjunct Processors are
+	  the CryptoExpress Cards (CEX).
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ap.
+
+	  If unsure, say Y (default).
+
+config AP_DEBUG
+	def_bool n
+	prompt "Enable debug features for Adjunct Processor (ap) devices"
+	depends on AP
+	help
+	  Say 'Y' here to enable some additional debug features for Adjunct
+	  Processor (ap) devices.
+
+	  There will be some more sysfs attributes displayed for ap queues.
+
+	  Do not enable on production level kernel build.
+
+	  If unsure, say N.
+
 config VFIO_CCW
 	def_tristate n
 	prompt "Support for VFIO-CCW subchannels"
-	depends on S390_CCW_IOMMU && VFIO_MDEV
+	depends on VFIO
+	select VFIO_MDEV
 	help
 	  This driver allows usage of I/O subchannels via VFIO-CCW.
 
@@ -715,8 +829,10 @@ config VFIO_CCW
 config VFIO_AP
 	def_tristate n
 	prompt "VFIO support for AP devices"
-	depends on S390_AP_IOMMU && VFIO_MDEV && KVM
-	depends on ZCRYPT
+	depends on KVM
+	depends on VFIO
+	depends on AP
+	select VFIO_MDEV
 	help
 	  This driver grants access to Adjunct Processor (AP) devices
 	  via the VFIO mediated device interface.
@@ -726,22 +842,6 @@ config VFIO_AP
 
 endmenu
 
-menu "Dump support"
-
-config CRASH_DUMP
-	bool "kernel crash dumps"
-	select KEXEC
-	help
-	  Generate crash dump after being started by kexec.
-	  Crash dump kernels are loaded in the main kernel with kexec-tools
-	  into a specially reserved region and then later executed after
-	  a crash by kdump/kexec.
-	  Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
-	  This option also enables s390 zfcpdump.
-	  See also <file:Documentation/s390/zfcpdump.rst>
-
-endmenu
-
 config CCW
 	def_bool y
 
@@ -751,17 +851,6 @@ config HAVE_PNETID
 
 menu "Virtualization"
 
-config PROTECTED_VIRTUALIZATION_GUEST
-	def_bool n
-	prompt "Protected virtualization guest support"
-	help
-	  Select this option, if you want to be able to run this
-	  kernel as a protected virtualization KVM guest.
-	  Protected virtualization capable machines have a mini hypervisor
-	  located at machine level (an ultravisor). With help of the
-	  Ultravisor, KVM will be able to run "protected" VMs, special
-	  VMs whose memory and management data are unavailable to KVM.
-
 config PFAULT
 	def_bool y
 	prompt "Pseudo page fault support"
@@ -860,13 +949,24 @@ config APPLDATA_NET_SUM
 	  This can also be compiled as a module, which will be called
 	  appldata_net_sum.o.
 
-config S390_HYPFS_FS
+config S390_HYPFS
 	def_bool y
+	prompt "s390 hypervisor information"
+	help
+	  This provides several binary files at (debugfs)/s390_hypfs/ to
+	  provide accounting information in an s390 hypervisor environment.
+
+config S390_HYPFS_FS
+	def_bool n
 	prompt "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
+	depends on S390_HYPFS
 	help
 	  This is a virtual file system intended to provide accounting
-	  information in an s390 hypervisor environment.
+	  information in an s390 hypervisor environment. This file system
+	  is deprecated and should not be used.
+
+	  Say N if you are unsure.
 
 source "arch/s390/kvm/Kconfig"
 
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c4300ea4abf8..7955d7eee7d8 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -13,6 +13,16 @@ config DEBUG_ENTRY
 
 	  If unsure, say N.
 
+config STRICT_MM_TYPECHECKS
+	bool "Strict Memory Management Type Checks"
+	depends on DEBUG_KERNEL
+	help
+	  Enable strict type checking for memory management types like pte_t
+	  and pmd_t. This generates slightly worse code and should be used
+	  for debug builds.
+
+	  If unsure, say N.
+
 config CIO_INJECT
 	bool "CIO Inject interfaces"
 	depends on DEBUG_KERNEL && DEBUG_FS
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 4cb5d17e7ead..7679bc16b692 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,28 +14,29 @@ KBUILD_AFLAGS_MODULE += -fPIC
 KBUILD_CFLAGS_MODULE += -fPIC
 KBUILD_AFLAGS	+= -m64
 KBUILD_CFLAGS	+= -m64
-ifeq ($(CONFIG_RELOCATABLE),y)
-KBUILD_CFLAGS	+= -fPIE
-LDFLAGS_vmlinux	:= -pie
-endif
+KBUILD_CFLAGS	+= -fPIC
+LDFLAGS_vmlinux	:= $(call ld-option,-no-pie)
+extra_tools	:= relocs
 aflags_dwarf	:= -Wa,-gdwarf-2
 KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
 ifndef CONFIG_AS_IS_LLVM
 KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
 endif
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11
 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
+KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
+KBUILD_CFLAGS_DECOMPRESSOR += -fPIE
 KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
 
 UTS_MACHINE	:= s390x
-STACK_SIZE	:= $(if $(CONFIG_KASAN),65536,16384)
+STACK_SIZE	:= $(if $(CONFIG_KASAN),65536,$(if $(CONFIG_KMSAN),65536,16384))
 CHECKFLAGS	+= -D__s390__ -D__s390x__
 
 export LD_BFD
@@ -47,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13)    := -march=z13
 mflags-$(CONFIG_MARCH_Z14)    := -march=z14
 mflags-$(CONFIG_MARCH_Z15)    := -march=z15
 mflags-$(CONFIG_MARCH_Z16)    := -march=z16
+mflags-$(CONFIG_MARCH_Z17)    := -march=z17
 
 export CC_FLAGS_MARCH := $(mflags-y)
 
@@ -60,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE)		+= -mtune=z13
 cflags-$(CONFIG_MARCH_Z14_TUNE)		+= -mtune=z14
 cflags-$(CONFIG_MARCH_Z15_TUNE)		+= -mtune=z15
 cflags-$(CONFIG_MARCH_Z16_TUNE)		+= -mtune=z16
+cflags-$(CONFIG_MARCH_Z17_TUNE)		+= -mtune=z17
 
 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 
@@ -71,18 +74,8 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
 KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
 KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
 
-ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
-  CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE)
-  ifeq ($(call cc-option,-mstack-size=8192),)
-    CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD)
-  endif
-  export CC_FLAGS_CHECK_STACK
-  cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK)
-endif
-
 ifdef CONFIG_EXPOLINE
   ifdef CONFIG_EXPOLINE_EXTERN
-    KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
     CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
     CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
   else
@@ -119,10 +112,7 @@ export KBUILD_CFLAGS_DECOMPRESSOR
 
 OBJCOPYFLAGS	:= -O binary
 
-head-y		:= arch/s390/kernel/head64.o
-
 libs-y		+= arch/s390/lib/
-drivers-y	+= drivers/s390/
 
 boot		:= arch/s390/boot
 syscalls	:= arch/s390/kernel/syscalls
@@ -142,15 +132,12 @@ bzImage: vmlinux
 zfcpdump:
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-vdso_install:
-	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-
 archheaders:
 	$(Q)$(MAKE) $(build)=$(syscalls) uapi
 
 archprepare:
 	$(Q)$(MAKE) $(build)=$(syscalls) kapi
-	$(Q)$(MAKE) $(build)=$(tools) kapi
+	$(Q)$(MAKE) $(build)=$(tools) kapi $(extra_tools)
 ifeq ($(KBUILD_EXTMOD),)
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
 # In order to do that, we should use the archprepare target, but we can't since
@@ -164,11 +151,9 @@ vdso_prepare: prepare0
 	$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
 		$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
 
-ifdef CONFIG_EXPOLINE_EXTERN
-modules_prepare: expoline_prepare
-expoline_prepare:
-	$(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
-endif
+vdso-install-y			+= arch/s390/kernel/vdso64/vdso64.so.dbg
+vdso-install-$(CONFIG_COMPAT)	+= arch/s390/kernel/vdso32/vdso32.so.dbg
+
 endif
 
 # Don't use tabs in echo arguments
diff --git a/arch/s390/Makefile.postlink b/arch/s390/Makefile.postlink
new file mode 100644
index 000000000000..c2b737500a91
--- /dev/null
+++ b/arch/s390/Makefile.postlink
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link s390 pass
+# ===========================================================================
+#
+# 1. Separate relocations from vmlinux into relocs.S.
+# 2. Strip relocations from vmlinux.
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+CMD_RELOCS=arch/s390/tools/relocs
+OUT_RELOCS = arch/s390/boot
+quiet_cmd_relocs = RELOCS  $(OUT_RELOCS)/relocs.S
+      cmd_relocs = \
+	mkdir -p $(OUT_RELOCS); \
+	$(CMD_RELOCS) $@ > $(OUT_RELOCS)/relocs.S
+
+vmlinux.unstripped: FORCE
+	$(call cmd,relocs)
+
+clean:
+	@rm -f $(OUT_RELOCS)/relocs.S
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index d74a4c7d5df6..dd7ba7587dd5 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -26,12 +26,10 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/workqueue.h>
-#include <linux/suspend.h>
-#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 #include <asm/appldata.h>
 #include <asm/vtimer.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
 #include <asm/smp.h>
 
 #include "appldata.h"
@@ -44,19 +42,17 @@
 #define TOD_MICRO	0x01000			/* nr. of TOD clock units
 						   for 1 microsecond */
 
-static struct platform_device *appldata_pdev;
-
 /*
  * /proc entries (sysctl)
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(struct ctl_table *ctl, int write,
+static int appldata_timer_handler(const struct ctl_table *ctl, int write,
 				  void *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(struct ctl_table *ctl, int write,
+static int appldata_interval_handler(const struct ctl_table *ctl, int write,
 				     void *buffer, size_t *lenp, loff_t *ppos);
 
 static struct ctl_table_header *appldata_sysctl_header;
-static struct ctl_table appldata_table[] = {
+static const struct ctl_table appldata_table[] = {
 	{
 		.procname	= "timer",
 		.mode		= S_IRUGO | S_IWUSR,
@@ -67,17 +63,6 @@ static struct ctl_table appldata_table[] = {
 		.mode		= S_IRUGO | S_IWUSR,
 		.proc_handler	= appldata_interval_handler,
 	},
-	{ },
-};
-
-static struct ctl_table appldata_dir_table[] = {
-	{
-		.procname	= appldata_proc_name,
-		.maxlen		= 0,
-		.mode		= S_IRUGO | S_IXUGO,
-		.child		= appldata_table,
-	},
-	{ },
 };
 
 /*
@@ -88,7 +73,6 @@ static struct vtimer_list appldata_timer;
 static DEFINE_SPINLOCK(appldata_timer_lock);
 static int appldata_interval = APPLDATA_CPU_INTERVAL;
 static int appldata_timer_active;
-static int appldata_timer_suspended = 0;
 
 /*
  * Work queue
@@ -215,7 +199,7 @@ static void __appldata_vtimer_setup(int cmd)
  * Start/Stop timer, show status of timer (0 = not active, 1 = active)
  */
 static int
-appldata_timer_handler(struct ctl_table *ctl, int write,
+appldata_timer_handler(const struct ctl_table *ctl, int write,
 			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int timer_active = appldata_timer_active;
@@ -248,7 +232,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
  * current timer interval.
  */
 static int
-appldata_interval_handler(struct ctl_table *ctl, int write,
+appldata_interval_handler(const struct ctl_table *ctl, int write,
 			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int interval = appldata_interval;
@@ -278,7 +262,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
  * monitoring (0 = not in process, 1 = in process)
  */
 static int
-appldata_generic_handler(struct ctl_table *ctl, int write,
+appldata_generic_handler(const struct ctl_table *ctl, int write,
 			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct appldata_ops *ops = NULL, *tmp_ops;
@@ -296,7 +280,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write,
 	mutex_lock(&appldata_ops_mutex);
 	list_for_each(lh, &appldata_ops_list) {
 		tmp_ops = list_entry(lh, struct appldata_ops, list);
-		if (&tmp_ops->ctl_table[2] == ctl) {
+		if (&tmp_ops->ctl_table[0] == ctl) {
 			found = 1;
 		}
 	}
@@ -366,7 +350,7 @@ int appldata_register_ops(struct appldata_ops *ops)
 	if (ops->size > APPLDATA_MAX_REC_SIZE)
 		return -EINVAL;
 
-	ops->ctl_table = kcalloc(4, sizeof(struct ctl_table), GFP_KERNEL);
+	ops->ctl_table = kcalloc(1, sizeof(struct ctl_table), GFP_KERNEL);
 	if (!ops->ctl_table)
 		return -ENOMEM;
 
@@ -374,17 +358,12 @@ int appldata_register_ops(struct appldata_ops *ops)
 	list_add(&ops->list, &appldata_ops_list);
 	mutex_unlock(&appldata_ops_mutex);
 
-	ops->ctl_table[0].procname = appldata_proc_name;
-	ops->ctl_table[0].maxlen   = 0;
-	ops->ctl_table[0].mode     = S_IRUGO | S_IXUGO;
-	ops->ctl_table[0].child    = &ops->ctl_table[2];
-
-	ops->ctl_table[2].procname = ops->name;
-	ops->ctl_table[2].mode     = S_IRUGO | S_IWUSR;
-	ops->ctl_table[2].proc_handler = appldata_generic_handler;
-	ops->ctl_table[2].data = ops;
+	ops->ctl_table[0].procname = ops->name;
+	ops->ctl_table[0].mode = S_IRUGO | S_IWUSR;
+	ops->ctl_table[0].proc_handler = appldata_generic_handler;
+	ops->ctl_table[0].data = ops;
 
-	ops->sysctl_header = register_sysctl_table(ops->ctl_table);
+	ops->sysctl_header = register_sysctl_sz(appldata_proc_name, ops->ctl_table, 1);
 	if (!ops->sysctl_header)
 		goto out;
 	return 0;
@@ -412,88 +391,6 @@ void appldata_unregister_ops(struct appldata_ops *ops)
 /********************** module-ops management <END> **************************/
 
 
-/**************************** suspend / resume *******************************/
-static int appldata_freeze(struct device *dev)
-{
-	struct appldata_ops *ops;
-	int rc;
-	struct list_head *lh;
-
-	spin_lock(&appldata_timer_lock);
-	if (appldata_timer_active) {
-		__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
-		appldata_timer_suspended = 1;
-	}
-	spin_unlock(&appldata_timer_lock);
-
-	mutex_lock(&appldata_ops_mutex);
-	list_for_each(lh, &appldata_ops_list) {
-		ops = list_entry(lh, struct appldata_ops, list);
-		if (ops->active == 1) {
-			rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
-					(unsigned long) ops->data, ops->size,
-					ops->mod_lvl);
-			if (rc != 0)
-				pr_err("Stopping the data collection for %s "
-				       "failed with rc=%d\n", ops->name, rc);
-		}
-	}
-	mutex_unlock(&appldata_ops_mutex);
-	return 0;
-}
-
-static int appldata_restore(struct device *dev)
-{
-	struct appldata_ops *ops;
-	int rc;
-	struct list_head *lh;
-
-	spin_lock(&appldata_timer_lock);
-	if (appldata_timer_suspended) {
-		__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
-		appldata_timer_suspended = 0;
-	}
-	spin_unlock(&appldata_timer_lock);
-
-	mutex_lock(&appldata_ops_mutex);
-	list_for_each(lh, &appldata_ops_list) {
-		ops = list_entry(lh, struct appldata_ops, list);
-		if (ops->active == 1) {
-			ops->callback(ops->data);	// init record
-			rc = appldata_diag(ops->record_nr,
-					APPLDATA_START_INTERVAL_REC,
-					(unsigned long) ops->data, ops->size,
-					ops->mod_lvl);
-			if (rc != 0) {
-				pr_err("Starting the data collection for %s "
-				       "failed with rc=%d\n", ops->name, rc);
-			}
-		}
-	}
-	mutex_unlock(&appldata_ops_mutex);
-	return 0;
-}
-
-static int appldata_thaw(struct device *dev)
-{
-	return appldata_restore(dev);
-}
-
-static const struct dev_pm_ops appldata_pm_ops = {
-	.freeze		= appldata_freeze,
-	.thaw		= appldata_thaw,
-	.restore	= appldata_restore,
-};
-
-static struct platform_driver appldata_pdrv = {
-	.driver = {
-		.name	= "appldata",
-		.pm	= &appldata_pm_ops,
-	},
-};
-/************************* suspend / resume <END> ****************************/
-
-
 /******************************* init / exit *********************************/
 
 /*
@@ -503,36 +400,14 @@ static struct platform_driver appldata_pdrv = {
  */
 static int __init appldata_init(void)
 {
-	int rc;
-
 	init_virt_timer(&appldata_timer);
 	appldata_timer.function = appldata_timer_function;
 	appldata_timer.data = (unsigned long) &appldata_work;
-
-	rc = platform_driver_register(&appldata_pdrv);
-	if (rc)
-		return rc;
-
-	appldata_pdev = platform_device_register_simple("appldata", -1, NULL,
-							0);
-	if (IS_ERR(appldata_pdev)) {
-		rc = PTR_ERR(appldata_pdev);
-		goto out_driver;
-	}
 	appldata_wq = alloc_ordered_workqueue("appldata", 0);
-	if (!appldata_wq) {
-		rc = -ENOMEM;
-		goto out_device;
-	}
-
-	appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
+	if (!appldata_wq)
+		return -ENOMEM;
+	appldata_sysctl_header = register_sysctl(appldata_proc_name, appldata_table);
 	return 0;
-
-out_device:
-	platform_device_unregister(appldata_pdev);
-out_driver:
-	platform_driver_unregister(&appldata_pdrv);
-	return rc;
 }
 
 __initcall(appldata_init);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 21c3147bd92a..fc608f9b79ab 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -15,7 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include "appldata.h"
 
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index f56591bc0897..af2a6a7bc028 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
 image
 bzImage
+relocs.S
 section_cmp.*
 vmlinux
 vmlinux.lds
+vmlinux.map
 vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 883357a211a3..bee49626be4b 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -3,61 +3,53 @@
 # Makefile for the linux s390-specific parts of the memory manager.
 #
 
+# Tooling runtimes are unavailable and cannot be linked for early boot code
 KCOV_INSTRUMENT := n
 GCOV_PROFILE := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
 KCSAN_SANITIZE := n
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+KMSAN_SANITIZE := n
 
 #
-# Use minimum architecture for als.c to be able to print an error
+# Use minimum architecture level so it is possible to print an error
 # message if the kernel is started on a machine which is too old
 #
-ifndef CONFIG_CC_IS_CLANG
-CC_FLAGS_MARCH_MINIMUM := -march=z900
-else
 CC_FLAGS_MARCH_MINIMUM := -march=z10
-endif
-
-ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
-AFLAGS_REMOVE_head.o		+= $(CC_FLAGS_MARCH)
-AFLAGS_head.o			+= $(CC_FLAGS_MARCH_MINIMUM)
-AFLAGS_REMOVE_mem.o		+= $(CC_FLAGS_MARCH)
-AFLAGS_mem.o			+= $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_als.o		+= $(CC_FLAGS_MARCH)
-CFLAGS_als.o			+= $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_sclp_early_core.o	+= $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o	+= $(CC_FLAGS_MARCH_MINIMUM)
-endif
+
+KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR))
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR))
+KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
+KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
 
 CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
 
-obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y	:= head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
 obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y	+= version.o pgm_check_info.o ctype.o ipl_data.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
-obj-$(CONFIG_RELOCATABLE)	+= machine_kexec_reloc.o
+obj-y	+= version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o
+obj-y	+= uv.o printk.o
 obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 obj-y	+= $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
 obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-$(CONFIG_KMSAN) += kmsan.o
 obj-all := $(obj-y) piggy.o syms.o
 
 targets	:= bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
 targets	+= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
 targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
+targets += relocs.S
 
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
 OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
 
+clean-files += vmlinux.map
+
 quiet_cmd_section_cmp = SECTCMP $*
 define cmd_section_cmp
-	s1=`$(OBJDUMP) -t -j "$*" "$<" | sort | \
+	s1=`$(OBJDUMP) -t "$<" | grep "\s$*\s\+" | sort | \
 		sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
-	s2=`$(OBJDUMP) -t -j "$*" "$(word 2,$^)" | sort | \
+	s2=`$(OBJDUMP) -t "$(word 2,$^)" | grep "\s$*\s\+" | sort | \
 		sed -n "/0000000000000000/! s/.*\s$*\s\+//p" | sha256sum`; \
 	if [ "$$s1" != "$$s2" ]; then \
 		echo "error: section $* differs between $< and $(word 2,$^)" >&2; \
@@ -72,11 +64,12 @@ $(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.b
 $(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
 	$(call if_changed,section_cmp)
 
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
+LDFLAGS_vmlinux-$(CONFIG_LD_ORPHAN_WARN) := --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup $(if $(CONFIG_VMLINUX_MAP),-Map=$(obj)/vmlinux.map) --build-id=sha1 -T
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
 	$(call if_changed,ld)
 
-LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+LDFLAGS_vmlinux.syms := $(LDFLAGS_vmlinux-y) --oformat $(LD_BFD) -e startup -T
 $(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
 	$(call if_changed,ld)
 
@@ -92,7 +85,7 @@ OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .
 $(obj)/syms.o: $(obj)/syms.bin FORCE
 	$(call if_changed,objcopy)
 
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=alloc,load
 $(obj)/info.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
@@ -104,6 +97,10 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
+# relocs.S is created by the vmlinux postlink step.
+$(obj)/relocs.S: vmlinux
+	@true
+
 suffix-$(CONFIG_KERNEL_GZIP)  := .gz
 suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
 suffix-$(CONFIG_KERNEL_LZ4)  := .lz4
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 47c48fbfb563..79afb5fa7f1f 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -9,42 +9,8 @@
 #include <asm/sclp.h>
 #include "boot.h"
 
-/*
- * The code within this file will be called very early. It may _not_
- * access anything within the bss section, since that is not cleared
- * yet and may contain data (e.g. initrd) that must be saved by other
- * code.
- * For temporary objects the stack (16k) should be used.
- */
-
 static unsigned long als[] = { FACILITIES_ALS };
 
-static void u16_to_hex(char *str, u16 val)
-{
-	int i, num;
-
-	for (i = 1; i <= 4; i++) {
-		num = (val >> (16 - 4 * i)) & 0xf;
-		if (num >= 10)
-			num += 7;
-		*str++ = '0' + num;
-	}
-	*str = '\0';
-}
-
-static void print_machine_type(void)
-{
-	static char mach_str[80] = "Detected machine-type number: ";
-	char type_str[5];
-	struct cpuid id;
-
-	get_cpu_id(&id);
-	u16_to_hex(type_str, id.machine);
-	strcat(mach_str, type_str);
-	strcat(mach_str, "\n");
-	sclp_early_printk(mach_str);
-}
-
 static void u16_to_decimal(char *str, u16 val)
 {
 	int div = 1;
@@ -80,8 +46,7 @@ void print_missing_facilities(void)
 			 * z/VM adds a four character prefix.
 			 */
 			if (strlen(als_str) > 70) {
-				strcat(als_str, "\n");
-				sclp_early_printk(als_str);
+				boot_emerg("%s\n", als_str);
 				*als_str = '\0';
 			}
 			u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -89,16 +54,18 @@ void print_missing_facilities(void)
 			first = 0;
 		}
 	}
-	strcat(als_str, "\n");
-	sclp_early_printk(als_str);
+	boot_emerg("%s\n", als_str);
 }
 
 static void facility_mismatch(void)
 {
-	sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
-	print_machine_type();
+	struct cpuid id;
+
+	get_cpu_id(&id);
+	boot_emerg("The Linux kernel requires more recent processor hardware\n");
+	boot_emerg("Detected machine-type number: %4x\n", id.machine);
 	print_missing_facilities();
-	sclp_early_printk("See Principles of Operations for facility bits\n");
+	boot_emerg("See Principles of Operations for facility bits\n");
 	disabled_wait();
 }
 
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
new file mode 100644
index 000000000000..19ea7934b918
--- /dev/null
+++ b/arch/s390/boot/alternative.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt)	"alt: " fmt
+#include "boot.h"
+
+#define a_debug		boot_debug
+
+#include "../kernel/alternative.c"
+
+static void alt_debug_all(int type)
+{
+	int i;
+
+	switch (type) {
+	case ALT_TYPE_FACILITY:
+		for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++)
+			alt_debug.facilities[i] = -1UL;
+		break;
+	case ALT_TYPE_FEATURE:
+		for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++)
+			alt_debug.mfeatures[i] = -1UL;
+		break;
+	case ALT_TYPE_SPEC:
+		alt_debug.spec = 1;
+		break;
+	}
+}
+
+static void alt_debug_modify(int type, unsigned int nr, bool clear)
+{
+	switch (type) {
+	case ALT_TYPE_FACILITY:
+		if (clear)
+			__clear_facility(nr, alt_debug.facilities);
+		else
+			__set_facility(nr, alt_debug.facilities);
+		break;
+	case ALT_TYPE_FEATURE:
+		if (clear)
+			__clear_machine_feature(nr, alt_debug.mfeatures);
+		else
+			__set_machine_feature(nr, alt_debug.mfeatures);
+		break;
+	}
+}
+
+static char *alt_debug_parse(int type, char *str)
+{
+	unsigned long val, endval;
+	char *endp;
+	bool clear;
+	int i;
+
+	if (*str == ':') {
+		str++;
+	} else {
+		alt_debug_all(type);
+		return str;
+	}
+	clear = false;
+	if (*str == '!') {
+		alt_debug_all(type);
+		clear = true;
+		str++;
+	}
+	while (*str) {
+		val = simple_strtoull(str, &endp, 0);
+		if (str == endp)
+			break;
+		str = endp;
+		if (*str == '-') {
+			str++;
+			endval = simple_strtoull(str, &endp, 0);
+			if (str == endp)
+				break;
+			str = endp;
+			while (val <= endval) {
+				alt_debug_modify(type, val, clear);
+				val++;
+			}
+		} else {
+			alt_debug_modify(type, val, clear);
+		}
+		if (*str != ',')
+			break;
+		str++;
+	}
+	return str;
+}
+
+/*
+ * Use debug-alternative command line parameter for debugging:
+ * "debug-alternative"
+ *  -> print debug message for every single alternative
+ *
+ * "debug-alternative=0;2"
+ * -> print debug message for all alternatives with type 0 and 2
+ *
+ * "debug-alternative=0:0-7"
+ * -> print debug message for all alternatives with type 0 and with
+ *    facility numbers within the range of 0-7
+ *    (if type 0 is ALT_TYPE_FACILITY)
+ *
+ * "debug-alternative=0:!8;1"
+ * -> print debug message for all alternatives with type 0, for all
+ *    facility number, except facility 8, and in addition print all
+ *    alternatives with type 1
+ */
+void alt_debug_setup(char *str)
+{
+	unsigned long type;
+	char *endp;
+	int i;
+
+	if (!str) {
+		alt_debug_all(ALT_TYPE_FACILITY);
+		alt_debug_all(ALT_TYPE_FEATURE);
+		alt_debug_all(ALT_TYPE_SPEC);
+		return;
+	}
+	while (*str) {
+		type = simple_strtoull(str, &endp, 0);
+		if (str == endp)
+			break;
+		str = endp;
+		switch (type) {
+		case ALT_TYPE_FACILITY:
+		case ALT_TYPE_FEATURE:
+			str = alt_debug_parse(type, str);
+			break;
+		case ALT_TYPE_SPEC:
+			alt_debug_all(ALT_TYPE_SPEC);
+			break;
+		}
+		if (*str != ';')
+			break;
+		str++;
+	}
+}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 70418389414d..e045cae6e80a 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,31 +8,118 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/printk.h>
+#include <asm/physmem_info.h>
+
+struct vmlinux_info {
+	unsigned long entry;
+	unsigned long image_size;	/* does not include .bss */
+	unsigned long bss_size;		/* uncompressed image .bss size */
+	unsigned long bootdata_off;
+	unsigned long bootdata_size;
+	unsigned long bootdata_preserved_off;
+	unsigned long bootdata_preserved_size;
+	unsigned long got_start;
+	unsigned long got_end;
+	unsigned long amode31_size;
+	unsigned long init_mm_off;
+	unsigned long swapper_pg_dir_off;
+	unsigned long invalid_pg_dir_off;
+	unsigned long alt_instructions;
+	unsigned long alt_instructions_end;
+#ifdef CONFIG_KASAN
+	unsigned long kasan_early_shadow_page_off;
+	unsigned long kasan_early_shadow_pte_off;
+	unsigned long kasan_early_shadow_pmd_off;
+	unsigned long kasan_early_shadow_pud_off;
+	unsigned long kasan_early_shadow_p4d_off;
+#endif
+};
+
 void startup_kernel(void);
-unsigned long detect_memory(void);
+unsigned long detect_max_physmem_end(void);
+void detect_physmem_online_ranges(unsigned long max_physmem_end);
+void physmem_set_usable_limit(unsigned long limit);
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
+void physmem_free(enum reserved_range_type type);
+/* for continuous/multiple allocations per type */
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+				   unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+			    unsigned long align, bool die_on_oom);
+/* for single allocations, 1 per type */
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+				  unsigned long align, unsigned long min, unsigned long max,
+				  bool die_on_oom);
+unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+				 unsigned long *intersection_start);
 bool is_ipl_block_dump(void);
 void store_ipl_parmblock(void);
+int read_ipl_report(void);
+void save_ipl_cert_comp_list(void);
 void setup_boot_command_line(void);
 void parse_boot_command_line(void);
 void verify_facilities(void);
 void print_missing_facilities(void);
 void sclp_early_setup_buffer(void);
-void print_pgm_check_info(void);
-unsigned long get_random_base(unsigned long safe_addr);
-void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void alt_debug_setup(char *str);
+void do_pgm_check(struct pt_regs *regs);
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+				     unsigned long min, unsigned long max);
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
+void print_stacktrace(unsigned long sp);
+void error(char *m);
+int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt)	fmt
+#endif
+
+#define boot_emerg(fmt, ...)	boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...)	boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...)	boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...)	boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...)	boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...)	boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...)	boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...)	boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
+
+extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
 
 /* Symbols defined by linker scripts */
 extern const char kernel_version[];
 extern unsigned long memory_limit;
 extern unsigned long vmalloc_size;
 extern int vmalloc_size_set;
-extern int kaslr_enabled;
 extern char __boot_data_start[], __boot_data_end[];
 extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+extern char __vmlinux_relocs_64_start[], __vmlinux_relocs_64_end[];
 extern char _decompressor_syms_start[], _decompressor_syms_end[];
 extern char _stack_start[], _stack_end[];
+extern char _end[], _decompressor_end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+extern struct vmlinux_info _vmlinux_info;
+
+#define vmlinux _vmlinux_info
 
-unsigned long read_ipl_report(unsigned long safe_offset);
+#define __lowcore_pa(x)		((unsigned long)(x) % sizeof(struct lowcore))
+#define __abs_lowcore_pa(x)	(((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
+#define __kernel_va(x)		((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
+#define __kernel_pa(x)		((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
+#define __identity_va(x)	((void *)((unsigned long)(x) + __identity_base))
+#define __identity_pa(x)	((unsigned long)(x) - __identity_base)
 
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+			      unsigned long addr1, unsigned long size1)
+{
+	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
 #endif /* __ASSEMBLY__ */
 #endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index e27c2140d620..03500b9d9fb9 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,8 +9,10 @@
 
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <asm/boot_data.h>
 #include <asm/page.h>
 #include "decompressor.h"
+#include "boot.h"
 
 /*
  * gzip declarations
@@ -23,9 +25,9 @@
 #define memmove memmove
 #define memzero(s, n) memset((s), 0, (n))
 
-#ifdef CONFIG_KERNEL_BZIP2
+#if defined(CONFIG_KERNEL_BZIP2)
 #define BOOT_HEAP_SIZE	0x400000
-#elif CONFIG_KERNEL_ZSTD
+#elif defined(CONFIG_KERNEL_ZSTD)
 #define BOOT_HEAP_SIZE	0x30000
 #else
 #define BOOT_HEAP_SIZE	0x10000
@@ -62,24 +64,22 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
 #include "../../../../lib/decompress_unzstd.c"
 #endif
 
-#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
+static void decompress_error(char *m)
+{
+	if (bootdebug)
+		boot_rb_dump();
+	boot_emerg("Decompression error: %s\n", m);
+	boot_emerg(" -- System halted\n");
+	disabled_wait();
+}
 
 unsigned long mem_safe_offset(void)
 {
-	/*
-	 * due to 4MB HEAD_SIZE for bzip2
-	 * 'decompress_offset + vmlinux.image_size' could be larger than
-	 * kernel at final position + its .bss, so take the larger of two
-	 */
-	return max(decompress_offset + vmlinux.image_size,
-		   vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
+	return ALIGN(free_mem_end_ptr, PAGE_SIZE);
 }
 
-void *decompress_kernel(void)
+void deploy_kernel(void *output)
 {
-	void *output = (void *)decompress_offset;
-
 	__decompress(_compressed_start, _compressed_end - _compressed_start,
-		     NULL, NULL, output, 0, NULL, error);
-	return output;
+		     NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
 }
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..4f966f06bd65 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,37 +2,9 @@
 #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
 #define BOOT_COMPRESSED_DECOMPRESSOR_H
 
-#include <linux/stddef.h>
-
-#ifdef CONFIG_KERNEL_UNCOMPRESSED
-static inline void *decompress_kernel(void) { return NULL; }
-#else
-void *decompress_kernel(void);
-#endif
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
 unsigned long mem_safe_offset(void);
-void error(char *m);
-
-struct vmlinux_info {
-	unsigned long default_lma;
-	void (*entry)(void);
-	unsigned long image_size;	/* does not include .bss */
-	unsigned long bss_size;		/* uncompressed image .bss size */
-	unsigned long bootdata_off;
-	unsigned long bootdata_size;
-	unsigned long bootdata_preserved_off;
-	unsigned long bootdata_preserved_size;
-	unsigned long dynsym_start;
-	unsigned long rela_dyn_start;
-	unsigned long rela_dyn_end;
-	unsigned long amode31_size;
-};
-
-/* Symbols defined by linker scripts */
-extern char _end[];
-extern unsigned char _compressed_start[];
-extern unsigned char _compressed_end[];
-extern char _vmlinux_info[];
-
-#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
+void deploy_kernel(void *output);
+#endif
 
 #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 3f79b9efb803..0b511d5c030b 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -67,7 +67,7 @@ ipl_start:
 	jz	.Lagain1		# skip dataset header
 	larl	%r13,.L_eof
 	clc	0(3,%r4),0(%r13)	# if it is EOFx
-	jz	.Lagain1		# skip dateset trailer
+	jz	.Lagain1		# skip data set trailer
 	lgr	%r5,%r2
 	la	%r6,COMMAND_LINE-PARMAREA(%r12)
 	lgr	%r7,%r2
@@ -185,19 +185,19 @@ ipl_start:
 	larl	%r13,.Lcrash
 	lpsw	0(%r13)
 
-	.align	8
+	.balign	8
 .Lwaitpsw:
 	.quad	0x0202000180000000,.Lioint
 .Lnewpswmask:
 	.quad	0x0000000180000000
-	.align	8
+	.balign	8
 .Lorb:	.long	0x00000000,0x0080ff00,.Lccws
 .Lirb:	.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-	.align	8
+	.balign	8
 .Lcr6:	.quad	0x00000000ff000000
-	.align	8
+	.balign	8
 .Lcrash:.long	0x000a0000,0x00000000
-	.align	8
+	.balign	8
 .Lccws: .rept	19
 	.long	0x02600050,0x00000000
 	.endr
@@ -207,7 +207,7 @@ ipl_start:
 	.byte	0xc8,0xd6,0xd3,0xc4	# "change rdr all keep nohold"
 .L_eof: .long	0xc5d6c600	 /* C'EOF' */
 .L_hdr: .long	0xc8c4d900	 /* C'HDR' */
-	.align	8
+	.balign	8
 .Lcpuid:.fill	8,1,0
 
 #
@@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal)
 	xc	0xf00(256),0xf00
 	larl	%r13,.Lctl
 	lctlg	%c0,%c15,0(%r13)	# load control registers
-	stcke	__LC_BOOT_CLOCK
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+	larl	%r13,tod_clock_base
+	stcke	0(%r13)
+	mvc	__LC_LAST_UPDATE_CLOCK(8),1(%r13)
 	larl	%r13,6f
 	spt	0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),0(%r13)
@@ -265,7 +266,7 @@ SYM_CODE_START_LOCAL(startup_normal)
 	brasl	%r14,startup_kernel
 SYM_CODE_END(startup_normal)
 
-	.align	8
+	.balign	8
 6:	.long	0x7fffffff,0xffffffff
 .Lext_new_psw:
 	.quad	0x0002000180000000,0x1b0	# disabled wait
@@ -292,18 +293,12 @@ SYM_CODE_END(startup_normal)
 
 #include "head_kdump.S"
 
-#
-# This program check is active immediately after kernel start
-# and until early_pgm_check_handler is set in kernel/early.c
-# It simply saves general/control registers and psw in
-# the save area and does disabled wait with a faulty address.
-#
 SYM_CODE_START_LOCAL(startup_pgm_check_handler)
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	stmg	%r8,%r15,__LC_SAVE_AREA
 	la	%r8,4095
 	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
 	stmg	%r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
-	mvc	__LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
+	mvc	__LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA
 	mvc	__LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
 	mvc	__LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
 	ni	__LC_RETURN_PSW,0xfc	# remove IO and EX bits
@@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
 	oi	__LC_RETURN_PSW+1,0x2	# set wait state bit
 	larl	%r9,.Lold_psw_disabled_wait
 	stg	%r9,__LC_PGM_NEW_PSW+8
-	larl	%r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
-	brasl	%r14,print_pgm_check_info
+	larl	%r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+	la	%r2,STACK_FRAME_OVERHEAD(%r15)
+	mvc	__PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8)
+	mvc	__PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8)
+	mvc	__PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK
+	mvc	__PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE
+	brasl	%r14,do_pgm_check
+	larl	%r9,startup_pgm_check_handler
+	stg	%r9,__LC_PGM_NEW_PSW+8
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	lpswe	__LC_RETURN_PSW
 .Lold_psw_disabled_wait:
 	la	%r8,4095
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S
index f015469e7db9..f7107c76258c 100644
--- a/arch/s390/boot/head_kdump.S
+++ b/arch/s390/boot/head_kdump.S
@@ -82,12 +82,12 @@ SYM_CODE_START_LOCAL(startup_kdump)
 #
 # Startup of kdump (relocated new kernel)
 #
-.align 2
+	.balign	2
 startup_kdump_relocated:
 	basr	%r13,0
 0:	lpswe	.Lrestart_psw-0b(%r13)		# Start new kernel...
 SYM_CODE_END(startup_kdump)
-.align	8
+	.balign	8
 .Lrestart_psw:
 	.quad	0x0000000080000000,0x0000000000000000 + startup
 #else
@@ -95,7 +95,7 @@ SYM_CODE_START_LOCAL(startup_kdump)
 	larl	%r13,startup_kdump_crash
 	lpswe	0(%r13)
 SYM_CODE_END(startup_kdump)
-.align 8
+	.balign	8
 startup_kdump_crash:
 	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
 #endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index 616ba1660f08..fa41486258ee 100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -15,10 +15,12 @@
 #   $3 - kernel map file
 #   $4 - default install path (blank if root directory)
 
+set -e
+
 echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
      "bootloader config required" >&2
-if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
-if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
+if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
+if [ -f "$4/System.map-$1" ]; then mv -- "$4/System.map-$1" "$4/System.map-$1.old"; fi
 
-cat $2 > $4/vmlinuz-$1
-cp $3 $4/System.map-$1
+cat -- "$2" > "$4/vmlinuz-$1"
+cp -- "$3" "$4/System.map-$1"
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index ca78d6162245..f584d7da29cb 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,9 @@
 #include <linux/init.h>
 #include <linux/ctype.h>
 #include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+#include <asm/page-states.h>
+#include <asm/machine.h>
 #include <asm/ebcdic.h>
 #include <asm/sclp.h>
 #include <asm/sections.h>
@@ -19,42 +22,27 @@ struct parmarea parmarea __section(".parmarea") = {
 };
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-int __bootdata(noexec_disabled);
 
 unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
 int __bootdata_preserved(ipl_block_valid);
+int __bootdata_preserved(__kaslr_enabled);
+int __bootdata_preserved(cmma_flag) = 1;
 
 unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
 unsigned long memory_limit;
 int vmalloc_size_set;
-int kaslr_enabled;
 
 static inline int __diag308(unsigned long subcode, void *addr)
 {
-	unsigned long reg1, reg2;
-	union register_pair r1;
-	psw_t old;
-
-	r1.even = (unsigned long) addr;
-	r1.odd	= 0;
-	asm volatile(
-		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
-		"	epsw	%[reg1],%[reg2]\n"
-		"	st	%[reg1],0(%[psw_pgm])\n"
-		"	st	%[reg2],4(%[psw_pgm])\n"
-		"	larl	%[reg1],1f\n"
-		"	stg	%[reg1],8(%[psw_pgm])\n"
+	union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 };
+
+	asm_inline volatile(
 		"	diag	%[r1],%[subcode],0x308\n"
-		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
-		: [r1] "+&d" (r1.pair),
-		  [reg1] "=&d" (reg1),
-		  [reg2] "=&a" (reg2),
-		  "+Q" (S390_lowcore.program_new_psw),
-		  "=Q" (old)
-		: [subcode] "d" (subcode),
-		  [psw_old] "a" (&old),
-		  [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+		"0:\n"
+		EX_TABLE(0b, 0b)
+		: [r1] "+d" (r1.pair)
+		: [subcode] "d" (subcode)
 		: "cc", "memory");
 	return r1.odd;
 }
@@ -77,6 +65,9 @@ bool is_ipl_block_dump(void)
 	if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
 	    ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
 		return true;
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_ECKD &&
+	    ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+		return true;
 	return false;
 }
 
@@ -108,6 +99,11 @@ static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
 		scp_data_len = ipb->nvme.scp_data_len;
 		scp_data = ipb->nvme.scp_data;
 		break;
+	case IPL_PBT_ECKD:
+		scp_data_len = ipb->eckd.scp_data_len;
+		scp_data = ipb->eckd.scp_data;
+		break;
+
 	default:
 		goto out;
 	}
@@ -153,6 +149,7 @@ static void append_ipl_block_parm(void)
 		break;
 	case IPL_PBT_FCP:
 	case IPL_PBT_NVME:
+	case IPL_PBT_ECKD:
 		rc = ipl_block_get_ascii_scpdata(
 			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
@@ -182,7 +179,7 @@ void setup_boot_command_line(void)
 	if (has_ebcdic_char(parmarea.command_line))
 		EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
 	/* copy arch command line */
-	strcpy(early_command_line, strim(parmarea.command_line));
+	strscpy(early_command_line, strim(parmarea.command_line));
 
 	/* append IPL PARM data to the boot command line */
 	if (!is_prot_virt_guest() && ipl_block_valid)
@@ -204,7 +201,7 @@ static void check_cleared_facilities(void)
 
 	for (i = 0; i < ARRAY_SIZE(als); i++) {
 		if ((stfle_fac_list[i] & als[i]) != als[i]) {
-			sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+			boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
 			print_missing_facilities();
 			break;
 		}
@@ -255,8 +252,9 @@ void parse_boot_command_line(void)
 	char *args;
 	int rc;
 
-	kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
-	args = strcpy(command_line_buf, early_command_line);
+	__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
+	strscpy(command_line_buf, early_command_line);
+	args = command_line_buf;
 	while (*args) {
 		args = next_arg(args, &param, &val);
 
@@ -264,7 +262,7 @@ void parse_boot_command_line(void)
 			memory_limit = round_down(memparse(val, NULL), PAGE_SIZE);
 
 		if (!strcmp(param, "vmalloc") && val) {
-			vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
+			vmalloc_size = round_up(memparse(val, NULL), _SEGMENT_SIZE);
 			vmalloc_size_set = 1;
 		}
 
@@ -281,17 +279,20 @@ void parse_boot_command_line(void)
 				zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
 		}
 
-		if (!strcmp(param, "noexec")) {
-			rc = kstrtobool(val, &enabled);
-			if (!rc && !enabled)
-				noexec_disabled = 1;
-		}
-
 		if (!strcmp(param, "facilities") && val)
 			modify_fac_list(val);
 
+		if (!strcmp(param, "debug-alternative"))
+			alt_debug_setup(val);
+
 		if (!strcmp(param, "nokaslr"))
-			kaslr_enabled = 0;
+			__kaslr_enabled = 0;
+
+		if (!strcmp(param, "cmma")) {
+			rc = kstrtobool(val, &enabled);
+			if (!rc && !enabled)
+				cmma_flag = 0;
+		}
 
 #if IS_ENABLED(CONFIG_KVM)
 		if (!strcmp(param, "prot_virt")) {
@@ -300,5 +301,25 @@ void parse_boot_command_line(void)
 				prot_virt_host = 1;
 		}
 #endif
+		if (!strcmp(param, "relocate_lowcore") && test_facility(193))
+			set_machine_feature(MFEATURE_LOWCORE);
+		if (!strcmp(param, "earlyprintk"))
+			boot_earlyprintk = true;
+		if (!strcmp(param, "debug"))
+			boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+		if (!strcmp(param, "bootdebug")) {
+			bootdebug = true;
+			if (val)
+				strscpy(bootdebug_filter, val);
+		}
+		if (!strcmp(param, "quiet"))
+			boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+		if (!strcmp(param, "ignore_loglevel"))
+			boot_ignore_loglevel = true;
+		if (!strcmp(param, "loglevel")) {
+			boot_console_loglevel = simple_strtoull(val, NULL, 10);
+			if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+				boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+		}
 	}
 }
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 9b14045065b6..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -5,6 +5,7 @@
 #include <asm/sclp.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
+#include <asm/physmem_info.h>
 #include <uapi/asm/ipl.h>
 #include "boot.h"
 
@@ -16,24 +17,19 @@ unsigned long __bootdata_preserved(ipl_cert_list_size);
 unsigned long __bootdata(early_ipl_comp_list_addr);
 unsigned long __bootdata(early_ipl_comp_list_size);
 
+static struct ipl_rb_certificates *certs;
+static struct ipl_rb_components *comps;
+static bool ipl_report_needs_saving;
+
 #define for_each_rb_entry(entry, rb) \
 	for (entry = rb->entries; \
 	     (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
 	     entry++)
 
-static inline bool intersects(unsigned long addr0, unsigned long size0,
-			      unsigned long addr1, unsigned long size1)
-{
-	return addr0 + size0 > addr1 && addr1 + size1 > addr0;
-}
-
-static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
-					 struct ipl_rb_certificates *certs,
-					 unsigned long safe_addr)
+static unsigned long get_cert_comp_list_size(void)
 {
 	struct ipl_rb_certificate_entry *cert;
 	struct ipl_rb_component_entry *comp;
-	size_t size;
 
 	/*
 	 * Find the length for the IPL report boot data
@@ -44,36 +40,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
 	ipl_cert_list_size = 0;
 	for_each_rb_entry(cert, certs)
 		ipl_cert_list_size += sizeof(unsigned int) + cert->len;
-	size = ipl_cert_list_size + early_ipl_comp_list_size;
+	return ipl_cert_list_size + early_ipl_comp_list_size;
+}
 
-	/*
-	 * Start from safe_addr to find a free memory area large
-	 * enough for the IPL report boot data. This area is used
-	 * for ipl_cert_list_addr/ipl_cert_list_size and
-	 * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
-	 * not overlap with any component or any certificate.
-	 */
-repeat:
-	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
-	    intersects(initrd_data.start, initrd_data.size, safe_addr, size))
-		safe_addr = initrd_data.start + initrd_data.size;
-	for_each_rb_entry(comp, comps)
-		if (intersects(safe_addr, size, comp->addr, comp->len)) {
-			safe_addr = comp->addr + comp->len;
-			goto repeat;
-		}
-	for_each_rb_entry(cert, certs)
-		if (intersects(safe_addr, size, cert->addr, cert->len)) {
-			safe_addr = cert->addr + cert->len;
-			goto repeat;
-		}
-	early_ipl_comp_list_addr = safe_addr;
-	ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+				 unsigned long *intersection_start)
+{
+	struct ipl_rb_certificate_entry *cert;
 
-	return safe_addr + size;
+	if (!ipl_report_needs_saving)
+		return false;
+
+	for_each_rb_entry(cert, certs) {
+		if (intersects(addr, size, cert->addr, cert->len)) {
+			*intersection_start = cert->addr;
+			return true;
+		}
+	}
+	return false;
 }
 
-static void copy_components_bootdata(struct ipl_rb_components *comps)
+static void copy_components_bootdata(void)
 {
 	struct ipl_rb_component_entry *comp, *ptr;
 
@@ -82,7 +69,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps)
 		memcpy(ptr++, comp, sizeof(*ptr));
 }
 
-static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+static void copy_certificates_bootdata(void)
 {
 	struct ipl_rb_certificate_entry *cert;
 	void *ptr;
@@ -96,10 +83,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
 	}
 }
 
-unsigned long read_ipl_report(unsigned long safe_addr)
+int read_ipl_report(void)
 {
-	struct ipl_rb_certificates *certs;
-	struct ipl_rb_components *comps;
 	struct ipl_pl_hdr *pl_hdr;
 	struct ipl_rl_hdr *rl_hdr;
 	struct ipl_rb_hdr *rb_hdr;
@@ -112,7 +97,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
 	 */
 	if (!ipl_block_valid ||
 	    !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
-		return safe_addr;
+		return -1;
 	ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
 	/*
 	 * There is an IPL report, to find it load the pointer to the
@@ -120,7 +105,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
 	 * the IPL parameter list, then align the address to a double
 	 * word boundary.
 	 */
-	tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+	tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr;
 	pl_hdr = (struct ipl_pl_hdr *) tmp;
 	tmp = (tmp + pl_hdr->len + 7) & -8UL;
 	rl_hdr = (struct ipl_rl_hdr *) tmp;
@@ -150,16 +135,30 @@ unsigned long read_ipl_report(unsigned long safe_addr)
 	 * With either the component list or the certificate list
 	 * missing the kernel will stay ignorant of secure IPL.
 	 */
-	if (!comps || !certs)
-		return safe_addr;
+	if (!comps || !certs) {
+		certs = NULL;
+		return -1;
+	}
 
-	/*
-	 * Copy component and certificate list to a safe area
-	 * where the decompressed kernel can find them.
-	 */
-	safe_addr = find_bootdata_space(comps, certs, safe_addr);
-	copy_components_bootdata(comps);
-	copy_certificates_bootdata(certs);
+	ipl_report_needs_saving = true;
+	physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
+			(unsigned long)rl_end - (unsigned long)pl_hdr);
+	return 0;
+}
+
+void save_ipl_cert_comp_list(void)
+{
+	unsigned long size;
+
+	if (!ipl_report_needs_saving)
+		return;
+
+	size = get_cert_comp_list_size();
+	early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
+	ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
 
-	return safe_addr;
+	copy_components_bootdata();
+	copy_certificates_bootdata();
+	physmem_free(RR_IPLREPORT);
+	ipl_report_needs_saving = false;
 }
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index e8d74d4f62aa..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -3,7 +3,7 @@
  * Copyright IBM Corp. 2019
  */
 #include <linux/pgtable.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
 #include <asm/cpacf.h>
 #include <asm/timex.h>
 #include <asm/sclp.h>
@@ -32,7 +32,7 @@ struct prng_parm {
 static int check_prng(void)
 {
 	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
-		sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+		boot_warn("KASLR disabled: CPU has no PRNG\n");
 		return 0;
 	}
 	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -43,7 +43,7 @@ static int check_prng(void)
 		return PRNG_MODE_TDES;
 }
 
-static int get_random(unsigned long limit, unsigned long *value)
+int get_random(unsigned long limit, unsigned long *value)
 {
 	struct prng_parm prng = {
 		/* initial parameter block for tdes mode, copied from libica */
@@ -91,119 +91,108 @@ static int get_random(unsigned long limit, unsigned long *value)
 	return 0;
 }
 
-/*
- * To randomize kernel base address we have to consider several facts:
- * 1. physical online memory might not be continuous and have holes. mem_detect
- *    info contains list of online memory ranges we should consider.
- * 2. we have several memory regions which are occupied and we should not
- *    overlap and destroy them. Currently safe_addr tells us the border below
- *    which all those occupied regions are. We are safe to use anything above
- *    safe_addr.
- * 3. the upper limit might apply as well, even if memory above that limit is
- *    online. Currently those limitations are:
- *    3.1. Limit set by "mem=" kernel command line option
- *    3.2. memory reserved at the end for kasan initialization.
- * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
- *    Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
- *    (16 pages when the kernel is built with kasan enabled)
- * Assumptions:
- * 1. kernel size (including .bss size) and upper memory limit are page aligned.
- * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE
- *    aligned (in practice memory configurations granularity on z/VM and LPAR
- *    is 1mb).
- *
- * To guarantee uniform distribution of kernel base address among all suitable
- * addresses we generate random value just once. For that we need to build a
- * continuous range in which every value would be suitable. We can build this
- * range by simply counting all suitable addresses (let's call them positions)
- * which would be valid as kernel base address. To count positions we iterate
- * over online memory ranges. For each range which is big enough for the
- * kernel image we count all suitable addresses we can put the kernel image at
- * that is
- * (end - start - kernel_size) / THREAD_SIZE + 1
- * Two functions count_valid_kernel_positions and position_to_address help
- * to count positions in memory range given and then convert position back
- * to address.
- */
-static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
-						  unsigned long _min,
-						  unsigned long _max)
+static void sort_reserved_ranges(struct reserved_range *res, unsigned long size)
 {
-	unsigned long start, end, pos = 0;
-	int i;
-
-	for_each_mem_detect_block(i, &start, &end) {
-		if (_min >= end)
-			continue;
-		if (start >= _max)
-			break;
-		start = max(_min, start);
-		end = min(_max, end);
-		if (end - start < kernel_size)
-			continue;
-		pos += (end - start - kernel_size) / THREAD_SIZE + 1;
+	struct reserved_range tmp;
+	int i, j;
+
+	for (i = 1; i < size; i++) {
+		tmp = res[i];
+		for (j = i - 1; j >= 0 && res[j].start > tmp.start; j--)
+			res[j + 1] = res[j];
+		res[j + 1] = tmp;
 	}
-
-	return pos;
 }
 
-static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
-				 unsigned long _min, unsigned long _max)
+static unsigned long iterate_valid_positions(unsigned long size, unsigned long align,
+					     unsigned long _min, unsigned long _max,
+					     struct reserved_range *res, size_t res_count,
+					     bool pos_count, unsigned long find_pos)
 {
-	unsigned long start, end;
+	unsigned long start, end, tmp_end, range_pos, pos = 0;
+	struct reserved_range *res_end = res + res_count;
+	struct reserved_range *skip_res;
 	int i;
 
-	for_each_mem_detect_block(i, &start, &end) {
+	align = max(align, 8UL);
+	_min = round_up(_min, align);
+	for_each_physmem_usable_range(i, &start, &end) {
 		if (_min >= end)
 			continue;
+		start = round_up(start, align);
 		if (start >= _max)
 			break;
 		start = max(_min, start);
 		end = min(_max, end);
-		if (end - start < kernel_size)
-			continue;
-		if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
-			return start + (pos - 1) * THREAD_SIZE;
-		pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
+
+		while (start + size <= end) {
+			/* skip reserved ranges below the start */
+			while (res && res->end <= start) {
+				res++;
+				if (res >= res_end)
+					res = NULL;
+			}
+			skip_res = NULL;
+			tmp_end = end;
+			/* has intersecting reserved range */
+			if (res && res->start < end) {
+				skip_res = res;
+				tmp_end = res->start;
+			}
+			if (start + size <= tmp_end) {
+				range_pos = (tmp_end - start - size) / align + 1;
+				if (pos_count) {
+					pos += range_pos;
+				} else {
+					if (range_pos >= find_pos)
+						return start + (find_pos - 1) * align;
+					find_pos -= range_pos;
+				}
+			}
+			if (!skip_res)
+				break;
+			start = round_up(skip_res->end, align);
+		}
 	}
 
-	return 0;
+	return pos_count ? pos : 0;
 }
 
-unsigned long get_random_base(unsigned long safe_addr)
+/*
+ * Two types of decompressor memory allocations/reserves are considered
+ * differently.
+ *
+ * "Static" or "single" allocations are done via physmem_alloc_range() and
+ * physmem_reserve(), and they are listed in physmem_info.reserved[]. Each
+ * type of "static" allocation can only have one allocation per type and
+ * cannot have chains.
+ *
+ * On the other hand, "dynamic" or "repetitive" allocations are done via
+ * physmem_alloc_or_die(). These allocations are tightly packed together
+ * top down from the end of online memory. physmem_alloc_pos represents
+ * current position where those allocations start.
+ *
+ * Functions randomize_within_range() and iterate_valid_positions()
+ * only consider "dynamic" allocations by never looking above
+ * physmem_alloc_pos. "Static" allocations, however, are explicitly
+ * considered by checking the "res" (reserves) array. The first
+ * reserved_range of a "dynamic" allocation may also be checked along the
+ * way, but it will always be above the maximum value anyway.
+ */
+unsigned long randomize_within_range(unsigned long size, unsigned long align,
+				     unsigned long min, unsigned long max)
 {
-	unsigned long memory_limit = get_mem_detect_end();
-	unsigned long base_pos, max_pos, kernel_size;
-	unsigned long kasan_needs;
-	int i;
+	struct reserved_range res[RR_MAX];
+	unsigned long max_pos, pos;
 
-	memory_limit = min(memory_limit, ident_map_size);
+	memcpy(res, physmem_info.reserved, sizeof(res));
+	sort_reserved_ranges(res, ARRAY_SIZE(res));
+	max = min(max, get_physmem_alloc_pos());
 
-	/*
-	 * Avoid putting kernel in the end of physical memory
-	 * which kasan will use for shadow memory and early pgtable
-	 * mapping allocations.
-	 */
-	memory_limit -= kasan_estimate_memory_needs(memory_limit);
-
-	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) {
-		if (safe_addr < initrd_data.start + initrd_data.size)
-			safe_addr = initrd_data.start + initrd_data.size;
-	}
-	safe_addr = ALIGN(safe_addr, THREAD_SIZE);
-
-	kernel_size = vmlinux.image_size + vmlinux.bss_size;
-	if (safe_addr + kernel_size > memory_limit)
+	max_pos = iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), true, 0);
+	if (!max_pos)
 		return 0;
-
-	max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
-	if (!max_pos) {
-		sclp_early_printk("KASLR disabled: not enough memory\n");
-		return 0;
-	}
-
-	/* we need a value in the range [1, base_pos] inclusive */
-	if (get_random(max_pos, &base_pos))
+	if (get_random(max_pos, &pos))
 		return 0;
-	return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
+	return iterate_valid_positions(size, align, min, max, res, ARRAY_SIZE(res), false, pos + 1);
 }
diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c
new file mode 100644
index 000000000000..e7b3ac48143e
--- /dev/null
+++ b/arch/s390/boot/kmsan.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmsan-checks.h>
+
+void kmsan_unpoison_memory(const void *address, size_t size)
+{
+}
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
deleted file mode 100644
index 7fa1a32ea0f3..000000000000
--- a/arch/s390/boot/mem_detect.c
+++ /dev/null
@@ -1,191 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <asm/setup.h>
-#include <asm/processor.h>
-#include <asm/sclp.h>
-#include <asm/sections.h>
-#include <asm/mem_detect.h>
-#include <asm/sparsemem.h>
-#include "decompressor.h"
-#include "boot.h"
-
-struct mem_detect_info __bootdata(mem_detect);
-
-/* up to 256 storage elements, 1020 subincrements each */
-#define ENTRIES_EXTENDED_MAX						       \
-	(256 * (1020 / 2) * sizeof(struct mem_detect_block))
-
-/*
- * To avoid corrupting old kernel memory during dump, find lowest memory
- * chunk possible either right after the kernel end (decompressed kernel) or
- * after initrd (if it is present and there is no hole between the kernel end
- * and initrd)
- */
-static void *mem_detect_alloc_extended(void)
-{
-	unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
-
-	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
-	    initrd_data.start < offset + ENTRIES_EXTENDED_MAX)
-		offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64));
-
-	return (void *)offset;
-}
-
-static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
-{
-	if (n < MEM_INLINED_ENTRIES)
-		return &mem_detect.entries[n];
-	if (unlikely(!mem_detect.entries_extended))
-		mem_detect.entries_extended = mem_detect_alloc_extended();
-	return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
-}
-
-/*
- * sequential calls to add_mem_detect_block with adjacent memory areas
- * are merged together into single memory block.
- */
-void add_mem_detect_block(u64 start, u64 end)
-{
-	struct mem_detect_block *block;
-
-	if (mem_detect.count) {
-		block = __get_mem_detect_block_ptr(mem_detect.count - 1);
-		if (block->end == start) {
-			block->end = end;
-			return;
-		}
-	}
-
-	block = __get_mem_detect_block_ptr(mem_detect.count);
-	block->start = start;
-	block->end = end;
-	mem_detect.count++;
-}
-
-static int __diag260(unsigned long rx1, unsigned long rx2)
-{
-	unsigned long reg1, reg2, ry;
-	union register_pair rx;
-	psw_t old;
-	int rc;
-
-	rx.even = rx1;
-	rx.odd	= rx2;
-	ry = 0x10; /* storage configuration */
-	rc = -1;   /* fail */
-	asm volatile(
-		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
-		"	epsw	%[reg1],%[reg2]\n"
-		"	st	%[reg1],0(%[psw_pgm])\n"
-		"	st	%[reg2],4(%[psw_pgm])\n"
-		"	larl	%[reg1],1f\n"
-		"	stg	%[reg1],8(%[psw_pgm])\n"
-		"	diag	%[rx],%[ry],0x260\n"
-		"	ipm	%[rc]\n"
-		"	srl	%[rc],28\n"
-		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
-		: [reg1] "=&d" (reg1),
-		  [reg2] "=&a" (reg2),
-		  [rc] "+&d" (rc),
-		  [ry] "+&d" (ry),
-		  "+Q" (S390_lowcore.program_new_psw),
-		  "=Q" (old)
-		: [rx] "d" (rx.pair),
-		  [psw_old] "a" (&old),
-		  [psw_pgm] "a" (&S390_lowcore.program_new_psw)
-		: "cc", "memory");
-	return rc == 0 ? ry : -1;
-}
-
-static int diag260(void)
-{
-	int rc, i;
-
-	struct {
-		unsigned long start;
-		unsigned long end;
-	} storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
-
-	memset(storage_extents, 0, sizeof(storage_extents));
-	rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
-	if (rc == -1)
-		return -1;
-
-	for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
-		add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1);
-	return 0;
-}
-
-static int tprot(unsigned long addr)
-{
-	unsigned long reg1, reg2;
-	int rc = -EFAULT;
-	psw_t old;
-
-	asm volatile(
-		"	mvc	0(16,%[psw_old]),0(%[psw_pgm])\n"
-		"	epsw	%[reg1],%[reg2]\n"
-		"	st	%[reg1],0(%[psw_pgm])\n"
-		"	st	%[reg2],4(%[psw_pgm])\n"
-		"	larl	%[reg1],1f\n"
-		"	stg	%[reg1],8(%[psw_pgm])\n"
-		"	tprot	0(%[addr]),0\n"
-		"	ipm	%[rc]\n"
-		"	srl	%[rc],28\n"
-		"1:	mvc	0(16,%[psw_pgm]),0(%[psw_old])\n"
-		: [reg1] "=&d" (reg1),
-		  [reg2] "=&a" (reg2),
-		  [rc] "+&d" (rc),
-		  "=Q" (S390_lowcore.program_new_psw.addr),
-		  "=Q" (old)
-		: [psw_old] "a" (&old),
-		  [psw_pgm] "a" (&S390_lowcore.program_new_psw),
-		  [addr] "a" (addr)
-		: "cc", "memory");
-	return rc;
-}
-
-static void search_mem_end(void)
-{
-	unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
-	unsigned long offset = 0;
-	unsigned long pivot;
-
-	while (range > 1) {
-		range >>= 1;
-		pivot = offset + range;
-		if (!tprot(pivot << 20))
-			offset = pivot;
-	}
-
-	add_mem_detect_block(0, (offset + 1) << 20);
-}
-
-unsigned long detect_memory(void)
-{
-	unsigned long max_physmem_end;
-
-	sclp_early_get_memsize(&max_physmem_end);
-
-	if (!sclp_early_read_storage_info()) {
-		mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
-		return max_physmem_end;
-	}
-
-	if (!diag260()) {
-		mem_detect.info_source = MEM_DETECT_DIAG260;
-		return max_physmem_end;
-	}
-
-	if (max_physmem_end) {
-		add_mem_detect_block(0, max_physmem_end);
-		mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
-		return max_physmem_end;
-	}
-
-	search_mem_end();
-	mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
-	return get_mem_detect_end();
-}
diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c
new file mode 100644
index 000000000000..fa621fa5bc02
--- /dev/null
+++ b/arch/s390/boot/pgm_check.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+void print_stacktrace(unsigned long sp)
+{
+	struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+					 (unsigned long)_stack_end };
+	bool first = true;
+
+	boot_emerg("Call Trace:\n");
+	while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
+		struct stack_frame *sf = (struct stack_frame *)sp;
+
+		if (first)
+			boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+		else
+			boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+		if (sf->back_chain <= sp)
+			break;
+		sp = sf->back_chain;
+		first = false;
+	}
+}
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+
+static bool ex_handler(struct pt_regs *regs)
+{
+	const struct exception_table_entry *ex;
+
+	for (ex = __start___ex_table; ex < __stop___ex_table; ex++) {
+		if (extable_insn(ex) != regs->psw.addr)
+			continue;
+		if (ex->type != EX_TYPE_FIXUP)
+			return false;
+		regs->psw.addr = extable_fixup(ex);
+		return true;
+	}
+	return false;
+}
+
+void do_pgm_check(struct pt_regs *regs)
+{
+	struct psw_bits *psw = &psw_bits(regs->psw);
+	unsigned long *gpregs = regs->gprs;
+
+	if (ex_handler(regs))
+		return;
+	if (bootdebug)
+		boot_rb_dump();
+	boot_emerg("Linux version %s\n", kernel_version);
+	if (!is_prot_virt_guest() && early_command_line[0])
+		boot_emerg("Kernel command line: %s\n", early_command_line);
+	boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+		   regs->int_code & 0xffff, regs->int_code >> 17);
+	if (kaslr_enabled()) {
+		boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+		boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+	}
+	boot_emerg("PSW : %016lx %016lx (%pS)\n",
+		   regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr);
+	boot_emerg("      R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+		   psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+		   psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+	boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+	boot_emerg("      %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+	boot_emerg("      %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+	boot_emerg("      %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+	print_stacktrace(gpregs[15]);
+	boot_emerg("Last Breaking-Event-Address:\n");
+	boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break);
+	/* Convert to disabled wait PSW */
+	psw->io = 0;
+	psw->ext = 0;
+	psw->wait = 1;
+}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
deleted file mode 100644
index c2a1defc79da..000000000000
--- a/arch/s390/boot/pgm_check_info.c
+++ /dev/null
@@ -1,180 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/stdarg.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <asm/stacktrace.h>
-#include <asm/boot_data.h>
-#include <asm/lowcore.h>
-#include <asm/setup.h>
-#include <asm/sclp.h>
-#include <asm/uv.h>
-#include "boot.h"
-
-const char hex_asc[] = "0123456789abcdef";
-
-static char *as_hex(char *dst, unsigned long val, int pad)
-{
-	char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
-
-	for (*p-- = 0; p >= dst; val >>= 4)
-		*p-- = hex_asc[val & 0x0f];
-	return end;
-}
-
-static char *symstart(char *p)
-{
-	while (*p)
-		p--;
-	return p + 1;
-}
-
-static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
-{
-	/* symbol entries are in a form "10000 c4 startup\0" */
-	char *a = _decompressor_syms_start;
-	char *b = _decompressor_syms_end;
-	unsigned long start;
-	unsigned long size;
-	char *pivot;
-	char *endp;
-
-	while (a < b) {
-		pivot = symstart(a + (b - a) / 2);
-		start = simple_strtoull(pivot, &endp, 16);
-		size = simple_strtoull(endp + 1, &endp, 16);
-		if (ip < start) {
-			b = pivot;
-			continue;
-		}
-		if (ip > start + size) {
-			a = pivot + strlen(pivot) + 1;
-			continue;
-		}
-		*off = ip - start;
-		*len = size;
-		return endp + 1;
-	}
-	return NULL;
-}
-
-static noinline char *strsym(void *ip)
-{
-	static char buf[64];
-	unsigned short off;
-	unsigned short len;
-	char *p;
-
-	p = findsym((unsigned long)ip, &off, &len);
-	if (p) {
-		strncpy(buf, p, sizeof(buf));
-		/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
-		p = buf + strnlen(buf, sizeof(buf) - 15);
-		strcpy(p, "+0x");
-		p = as_hex(p + 3, off, 0);
-		strcpy(p, "/0x");
-		as_hex(p + 3, len, 0);
-	} else {
-		as_hex(buf, (unsigned long)ip, 16);
-	}
-	return buf;
-}
-
-void decompressor_printk(const char *fmt, ...)
-{
-	char buf[1024] = { 0 };
-	char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
-	unsigned long pad;
-	char *p = buf;
-	va_list args;
-
-	va_start(args, fmt);
-	for (; p < end && *fmt; fmt++) {
-		if (*fmt != '%') {
-			*p++ = *fmt;
-			continue;
-		}
-		pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
-		switch (*fmt) {
-		case 's':
-			p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
-			break;
-		case 'p':
-			if (*++fmt != 'S')
-				goto out;
-			p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
-			break;
-		case 'l':
-			if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
-				goto out;
-			p = as_hex(p, va_arg(args, unsigned long), pad);
-			break;
-		case 'x':
-			if (end - p <= max(sizeof(int) * 2, pad))
-				goto out;
-			p = as_hex(p, va_arg(args, unsigned int), pad);
-			break;
-		default:
-			goto out;
-		}
-	}
-out:
-	va_end(args);
-	sclp_early_printk(buf);
-}
-
-static noinline void print_stacktrace(void)
-{
-	struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
-					 (unsigned long)_stack_end };
-	unsigned long sp = S390_lowcore.gpregs_save_area[15];
-	bool first = true;
-
-	decompressor_printk("Call Trace:\n");
-	while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
-		struct stack_frame *sf = (struct stack_frame *)sp;
-
-		decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
-					    " sp:%016lx [<%016lx>] %pS\n",
-				    sp, sf->gprs[8], (void *)sf->gprs[8]);
-		if (sf->back_chain <= sp)
-			break;
-		sp = sf->back_chain;
-		first = false;
-	}
-}
-
-void print_pgm_check_info(void)
-{
-	unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area;
-	struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area);
-
-	decompressor_printk("Linux version %s\n", kernel_version);
-	if (!is_prot_virt_guest() && early_command_line[0])
-		decompressor_printk("Kernel command line: %s\n", early_command_line);
-	decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
-			    S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
-	if (kaslr_enabled)
-		decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
-	decompressor_printk("PSW : %016lx %016lx (%pS)\n",
-			    S390_lowcore.psw_save_area.mask,
-			    S390_lowcore.psw_save_area.addr,
-			    (void *)S390_lowcore.psw_save_area.addr);
-	decompressor_printk(
-		"      R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
-		psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
-		psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
-		psw->eaba);
-	decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n",
-			    gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
-	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
-			    gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
-	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
-			    gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
-	decompressor_printk("      %016lx %016lx %016lx %016lx\n",
-			    gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
-	print_stacktrace();
-	decompressor_printk("Last Breaking-Event-Address:\n");
-	decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
-			    (void *)S390_lowcore.pgm_last_break);
-}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
new file mode 100644
index 000000000000..45e3d057cfaa
--- /dev/null
+++ b/arch/s390/boot/physmem_info.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
+#include <linux/processor.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/physmem_info.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sparsemem.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/asm.h>
+#include <asm/uv.h>
+#include "decompressor.h"
+#include "boot.h"
+
+struct physmem_info __bootdata(physmem_info);
+static unsigned int physmem_alloc_ranges;
+static unsigned long physmem_alloc_pos;
+
+/* up to 256 storage elements, 1020 subincrements each */
+#define ENTRIES_EXTENDED_MAX						       \
+	(256 * (1020 / 2) * sizeof(struct physmem_range))
+
+static struct physmem_range *__get_physmem_range_ptr(u32 n)
+{
+	if (n < MEM_INLINED_ENTRIES)
+		return &physmem_info.online[n];
+	if (unlikely(!physmem_info.online_extended)) {
+		physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
+			RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+			physmem_alloc_pos, true);
+	}
+	return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
+}
+
+/*
+ * sequential calls to add_physmem_online_range with adjacent memory ranges
+ * are merged together into single memory range.
+ */
+void add_physmem_online_range(u64 start, u64 end)
+{
+	struct physmem_range *range;
+
+	if (physmem_info.range_count) {
+		range = __get_physmem_range_ptr(physmem_info.range_count - 1);
+		if (range->end == start) {
+			range->end = end;
+			return;
+		}
+	}
+
+	range = __get_physmem_range_ptr(physmem_info.range_count);
+	range->start = start;
+	range->end = end;
+	physmem_info.range_count++;
+}
+
+static int __diag260(unsigned long rx1, unsigned long rx2)
+{
+	union register_pair rx;
+	int cc, exception;
+	unsigned long ry;
+
+	rx.even = rx1;
+	rx.odd	= rx2;
+	ry = 0x10; /* storage configuration */
+	exception = 1;
+	asm_inline volatile(
+		"	diag	%[rx],%[ry],0x260\n"
+		"0:	lhi	%[exc],0\n"
+		"1:\n"
+		CC_IPM(cc)
+		EX_TABLE(0b, 1b)
+		: CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry)
+		: [rx] "d" (rx.pair)
+		: CC_CLOBBER_LIST("memory"));
+	cc = exception ? -1 : CC_TRANSFORM(cc);
+	return cc == 0 ? ry : -1;
+}
+
+static int diag260(void)
+{
+	int rc, i;
+
+	struct {
+		unsigned long start;
+		unsigned long end;
+	} storage_extents[8] __aligned(16); /* VM supports up to 8 extends */
+
+	memset(storage_extents, 0, sizeof(storage_extents));
+	rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents));
+	if (rc == -1)
+		return -1;
+
+	for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++)
+		add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1);
+	return 0;
+}
+
+#define DIAG500_SC_STOR_LIMIT 4
+
+static int diag500_storage_limit(unsigned long *max_physmem_end)
+{
+	unsigned long storage_limit;
+
+	asm_inline volatile(
+		"	lghi	%%r1,%[subcode]\n"
+		"	lghi	%%r2,0\n"
+		"	diag	%%r2,%%r4,0x500\n"
+		"0:	lgr	%[slimit],%%r2\n"
+		EX_TABLE(0b, 0b)
+		: [slimit] "=d" (storage_limit)
+		: [subcode] "i" (DIAG500_SC_STOR_LIMIT)
+		: "memory", "1", "2");
+	if (!storage_limit)
+		return -EINVAL;
+	/* Convert inclusive end to exclusive end */
+	*max_physmem_end = storage_limit + 1;
+	return 0;
+}
+
+static int tprot(unsigned long addr)
+{
+	int cc, exception;
+
+	exception = 1;
+	asm_inline volatile(
+		"	tprot	0(%[addr]),0\n"
+		"0:	lhi	%[exc],0\n"
+		"1:\n"
+		CC_IPM(cc)
+		EX_TABLE(0b, 1b)
+		: CC_OUT(cc, cc), [exc] "+d" (exception)
+		: [addr] "a" (addr)
+		: CC_CLOBBER_LIST("memory"));
+	cc = exception ? -EFAULT : CC_TRANSFORM(cc);
+	return cc;
+}
+
+static unsigned long search_mem_end(void)
+{
+	unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
+	unsigned long offset = 0;
+	unsigned long pivot;
+
+	while (range > 1) {
+		range >>= 1;
+		pivot = offset + range;
+		if (!tprot(pivot << 20))
+			offset = pivot;
+	}
+	return (offset + 1) << 20;
+}
+
+unsigned long detect_max_physmem_end(void)
+{
+	unsigned long max_physmem_end = 0;
+
+	if (!diag500_storage_limit(&max_physmem_end)) {
+		physmem_info.info_source = MEM_DETECT_DIAG500_STOR_LIMIT;
+	} else if (!sclp_early_get_memsize(&max_physmem_end)) {
+		physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+	} else {
+		max_physmem_end = search_mem_end();
+		physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
+	}
+	boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+		   get_physmem_info_source());
+	return max_physmem_end;
+}
+
+void detect_physmem_online_ranges(unsigned long max_physmem_end)
+{
+	unsigned long start, end;
+	int i;
+
+	if (!sclp_early_read_storage_info()) {
+		physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
+	} else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
+		unsigned long online_end;
+
+		if (!sclp_early_get_memsize(&online_end)) {
+			physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+			add_physmem_online_range(0, online_end);
+		}
+	} else if (!diag260()) {
+		physmem_info.info_source = MEM_DETECT_DIAG260;
+	} else if (max_physmem_end) {
+		add_physmem_online_range(0, max_physmem_end);
+	}
+	boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+	for_each_physmem_online_range(i, &start, &end)
+		boot_debug(" online [%d]:   0x%016lx-0x%016lx\n", i, start, end);
+}
+
+void physmem_set_usable_limit(unsigned long limit)
+{
+	physmem_info.usable = limit;
+	physmem_alloc_pos = limit;
+	boot_debug("Usable memory limit: 0x%016lx\n", limit);
+}
+
+static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
+{
+	unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
+	struct reserved_range *range;
+	enum reserved_range_type t;
+	int i;
+
+	boot_emerg("Linux version %s\n", kernel_version);
+	if (!is_prot_virt_guest() && early_command_line[0])
+		boot_emerg("Kernel command line: %s\n", early_command_line);
+	boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+		   size, align, min, max);
+	boot_emerg("Reserved memory ranges:\n");
+	for_each_physmem_reserved_range(t, range, &start, &end) {
+		boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+		total_reserved_mem += end - start;
+	}
+	boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+		   get_physmem_info_source(), physmem_info.info_source);
+	for_each_physmem_usable_range(i, &start, &end) {
+		boot_emerg("%016lx %016lx\n", start, end);
+		total_mem += end - start;
+	}
+	boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+		   total_mem, total_reserved_mem,
+		   total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+	print_stacktrace(current_frame_address());
+	boot_emerg(" -- System halted\n");
+	disabled_wait();
+}
+
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+	physmem_info.reserved[type].start = addr;
+	physmem_info.reserved[type].end = addr + size;
+}
+
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+	_physmem_reserve(type, addr, size);
+	boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+		   get_rr_type_name(type));
+}
+
+void physmem_free(enum reserved_range_type type)
+{
+	boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+		   physmem_info.reserved[type].end, get_rr_type_name(type));
+	physmem_info.reserved[type].start = 0;
+	physmem_info.reserved[type].end = 0;
+}
+
+static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
+				       unsigned long *intersection_start)
+{
+	unsigned long res_addr, res_size;
+	int t;
+
+	for (t = 0; t < RR_MAX; t++) {
+		if (!get_physmem_reserved(t, &res_addr, &res_size))
+			continue;
+		if (intersects(addr, size, res_addr, res_size)) {
+			*intersection_start = res_addr;
+			return true;
+		}
+	}
+	return ipl_report_certs_intersects(addr, size, intersection_start);
+}
+
+static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
+					   unsigned long min, unsigned long max,
+					   unsigned int from_ranges, unsigned int *ranges_left,
+					   bool die_on_oom)
+{
+	unsigned int nranges = from_ranges ?: physmem_info.range_count;
+	unsigned long range_start, range_end;
+	unsigned long intersection_start;
+	unsigned long addr, pos = max;
+
+	align = max(align, 8UL);
+	while (nranges) {
+		__get_physmem_range(nranges - 1, &range_start, &range_end, false);
+		pos = min(range_end, pos);
+
+		if (round_up(min, align) + size > pos)
+			break;
+		addr = round_down(pos - size, align);
+		if (range_start > addr) {
+			nranges--;
+			continue;
+		}
+		if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
+			pos = intersection_start;
+			continue;
+		}
+
+		if (ranges_left)
+			*ranges_left = nranges;
+		return addr;
+	}
+	if (die_on_oom)
+		die_oom(size, align, min, max);
+	return 0;
+}
+
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+				  unsigned long align, unsigned long min, unsigned long max,
+				  bool die_on_oom)
+{
+	unsigned long addr;
+
+	max = min(max, physmem_alloc_pos);
+	addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
+	if (addr)
+		_physmem_reserve(type, addr, size);
+	boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+		   get_rr_type_name(type));
+	return addr;
+}
+
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+			    unsigned long align, bool die_on_oom)
+{
+	struct reserved_range *range = &physmem_info.reserved[type];
+	struct reserved_range *new_range = NULL;
+	unsigned int ranges_left;
+	unsigned long addr;
+
+	addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
+				     &ranges_left, die_on_oom);
+	if (!addr)
+		return 0;
+	/* if not a consecutive allocation of the same type or first allocation */
+	if (range->start != addr + size) {
+		if (range->end) {
+			addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+						     physmem_alloc_pos, physmem_alloc_ranges,
+						     &ranges_left, true);
+			new_range = (struct reserved_range *)addr;
+			addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+						     &ranges_left, die_on_oom);
+			if (!addr)
+				return 0;
+			*new_range = *range;
+			range->chain = new_range;
+		}
+		range->end = addr + size;
+	}
+	if (type != RR_VMEM) {
+		boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+			   addr, addr + size, get_rr_type_name(type), align, !!new_range);
+	}
+	range->start = addr;
+	physmem_alloc_pos = addr;
+	physmem_alloc_ranges = ranges_left;
+	return addr;
+}
+
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+				   unsigned long align)
+{
+	return physmem_alloc(type, size, align, true);
+}
+
+unsigned long get_physmem_alloc_pos(void)
+{
+	return physmem_alloc_pos;
+}
+
+void dump_physmem_reserved(void)
+{
+	struct reserved_range *range;
+	enum reserved_range_type t;
+	unsigned long start, end;
+
+	boot_debug("Reserved memory ranges:\n");
+	for_each_physmem_reserved_range(t, range, &start, &end) {
+		if (end) {
+			boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+				   get_rr_type_name(t), start, end, (unsigned long)range,
+				   (unsigned long)range->chain);
+		}
+	}
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
new file mode 100644
index 000000000000..4bb6bc95704e
--- /dev/null
+++ b/arch/s390/boot/printk.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sections.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+	/* leave double '\0' in the end */
+	size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+	/* store strings separated by '\0' */
+	if (len + 1 > avail)
+		boot_rb_off = 0;
+	avail = sizeof(boot_rb) - boot_rb_off - 1;
+	strscpy(boot_rb + boot_rb_off, str, avail);
+	boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+	sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+	return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+	if (debug_messages_printed())
+		return;
+	sclp_early_printk("Boot messages ring buffer:\n");
+	boot_rb_foreach(print_rb_entry);
+}
+
+const char hex_asc[] = "0123456789abcdef";
+
+static char *as_hex(char *dst, unsigned long val, int pad)
+{
+	char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+
+	for (*p-- = '\0'; p >= dst; val >>= 4)
+		*p-- = hex_asc[val & 0x0f];
+	return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+	bool negative = false;
+	char *p = buf + MAX_NUMLEN;
+
+	if (is_signed && (long)val < 0) {
+		val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+		negative = true;
+	}
+
+	*--p = '\0';
+	do {
+		*--p = '0' + (val % 10);
+		val /= 10;
+	} while (val);
+
+	if (negative)
+		*--p = '-';
+	return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+		      int _pad, bool zero_pad, bool decimal)
+{
+	ssize_t len = strlen(src), pad = _pad;
+	char *p = dst;
+
+	if (max(len, abs(pad)) >= dst_size)
+		return -E2BIG;
+
+	if (pad > len) {
+		if (decimal && zero_pad && *src == '-') {
+			*p++ = '-';
+			src++;
+			len--;
+			pad--;
+		}
+		memset(p, zero_pad ? '0' : ' ', pad - len);
+		p += pad - len;
+	}
+	memcpy(p, src, len);
+	p += len;
+	if (pad < 0 && -pad > len) {
+		memset(p, ' ', -pad - len);
+		p += -pad - len;
+	}
+	*p = '\0';
+	return p - dst;
+}
+
+static char *symstart(char *p)
+{
+	while (*p)
+		p--;
+	return p + 1;
+}
+
+static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
+{
+	/* symbol entries are in a form "10000 c4 startup\0" */
+	char *a = _decompressor_syms_start;
+	char *b = _decompressor_syms_end;
+	unsigned long start;
+	unsigned long size;
+	char *pivot;
+	char *endp;
+
+	while (a < b) {
+		pivot = symstart(a + (b - a) / 2);
+		start = simple_strtoull(pivot, &endp, 16);
+		size = simple_strtoull(endp + 1, &endp, 16);
+		if (ip < start) {
+			b = pivot;
+			continue;
+		}
+		if (ip > start + size) {
+			a = pivot + strlen(pivot) + 1;
+			continue;
+		}
+		*off = ip - start;
+		*len = size;
+		return endp + 1;
+	}
+	return NULL;
+}
+
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
+{
+	unsigned short off;
+	unsigned short len;
+	char *p;
+
+	p = findsym((unsigned long)ip, &off, &len);
+	if (p) {
+		strscpy(buf, p, MAX_SYMLEN);
+		/* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
+		p = buf + strnlen(buf, MAX_SYMLEN - 15);
+		strscpy(p, "+0x", MAX_SYMLEN - (p - buf));
+		as_hex(p + 3, off, 0);
+		strcat(p, "/0x");
+		as_hex(p + strlen(p), len, 0);
+	} else {
+		as_hex(buf, (unsigned long)ip, 16);
+	}
+	return buf;
+}
+
+static inline int printk_loglevel(const char *buf)
+{
+	if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+		switch (buf[1]) {
+		case '0' ... '7':
+			return buf[1] - '0';
+		}
+	}
+	return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+	int level = printk_loglevel(buf);
+
+	/* always print emergency messages */
+	if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+		return;
+	buf = printk_skip_level(buf);
+	/* print debug messages only when bootdebug is enabled */
+	if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+		return;
+	if (boot_ignore_loglevel || level < boot_console_loglevel)
+		sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+	unsigned long ns = tod_to_ns(__get_tod_clock_monotonic());
+	char ts[MAX_NUMLEN];
+
+	*buf++ = '[';
+	buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+	*buf++ = '.';
+	buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+	*buf++ = ']';
+	*buf++ = ' ';
+#endif
+	return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod)				\
+	((lenmod == 'l') ? va_arg(args, typemod long) :			\
+	 (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) :	\
+	 (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) :	\
+	 (lenmod == 'z') ? va_arg(args, typemod long) :			\
+			   va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
+{
+	char buf[1024] = { 0 };
+	char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
+	bool zero_pad, decimal;
+	char *strval, *p = buf;
+	char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
+	va_list args;
+	char lenmod;
+	ssize_t len;
+	int pad;
+
+	*p++ = KERN_SOH_ASCII;
+	*p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+	p = add_timestamp(p);
+	fmt = printk_skip_level(fmt);
+
+	va_start(args, fmt);
+	for (; p < end && *fmt; fmt++) {
+		if (*fmt != '%') {
+			*p++ = *fmt;
+			continue;
+		}
+		if (*++fmt == '%') {
+			*p++ = '%';
+			continue;
+		}
+		zero_pad = (*fmt == '0');
+		pad = simple_strtol(fmt, (char **)&fmt, 10);
+		lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+		if (lenmod == 'h' && *fmt == 'h') {
+			lenmod = 'H';
+			fmt++;
+		}
+		decimal = false;
+		switch (*fmt) {
+		case 's':
+			if (lenmod)
+				goto out;
+			strval = va_arg(args, char *);
+			zero_pad = false;
+			break;
+		case 'p':
+			if (*++fmt != 'S' || lenmod)
+				goto out;
+			strval = strsym(valbuf, va_arg(args, void *));
+			zero_pad = false;
+			break;
+		case 'd':
+		case 'i':
+			strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+			decimal = true;
+			break;
+		case 'u':
+			strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+			break;
+		case 'x':
+			strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+			break;
+		default:
+			goto out;
+		}
+		len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+		if (len == -E2BIG)
+			break;
+		p += len;
+	}
+out:
+	va_end(args);
+	len = strlen(buf);
+	if (len) {
+		boot_rb_add(buf, len);
+		boot_console_earlyprintk(buf);
+	}
+	return len;
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 863e6bcaa5a1..da8337e63a3e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,69 +1,220 @@
 // SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <asm/page-states.h>
 #include <asm/boot_data.h>
+#include <asm/extmem.h>
 #include <asm/sections.h>
+#include <asm/diag288.h>
+#include <asm/maccess.h>
+#include <asm/machine.h>
+#include <asm/sysinfo.h>
 #include <asm/cpu_mf.h>
 #include <asm/setup.h>
+#include <asm/timex.h>
 #include <asm/kasan.h>
 #include <asm/kexec.h>
 #include <asm/sclp.h>
 #include <asm/diag.h>
 #include <asm/uv.h>
+#include <asm/abs_lowcore.h>
+#include <asm/physmem_info.h>
 #include "decompressor.h"
 #include "boot.h"
 #include "uv.h"
 
-unsigned long __bootdata_preserved(__kaslr_offset);
-unsigned long __bootdata(__amode31_base);
+struct vm_layout __bootdata_preserved(vm_layout);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
 unsigned long __bootdata_preserved(VMALLOC_START);
 unsigned long __bootdata_preserved(VMALLOC_END);
 struct page *__bootdata_preserved(vmemmap);
 unsigned long __bootdata_preserved(vmemmap_size);
 unsigned long __bootdata_preserved(MODULES_VADDR);
 unsigned long __bootdata_preserved(MODULES_END);
-unsigned long __bootdata(ident_map_size);
-int __bootdata(is_full_image) = 1;
-struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata_preserved(page_noexec_mask);
+unsigned long __bootdata_preserved(segment_noexec_mask);
+unsigned long __bootdata_preserved(region_noexec_mask);
+union tod_clock __bootdata_preserved(tod_clock_base);
+u64 __bootdata_preserved(clock_comparator_max) = -1UL;
 
 u64 __bootdata_preserved(stfle_fac_list[16]);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
 struct oldmem_data __bootdata_preserved(oldmem_data);
 
 void error(char *x)
 {
-	sclp_early_printk("\n\n");
-	sclp_early_printk(x);
-	sclp_early_printk("\n\n -- System halted");
-
+	boot_emerg("%s\n", x);
+	boot_emerg(" -- System halted\n");
 	disabled_wait();
 }
 
+static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static void detect_machine_type(void)
+{
+	struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+	/* Check current-configuration-level */
+	if (stsi(NULL, 0, 0, 0) <= 2) {
+		set_machine_feature(MFEATURE_LPAR);
+		return;
+	}
+	/* Get virtual-machine cpu information. */
+	if (stsi(vmms, 3, 2, 2) || !vmms->count)
+		return;
+	/* Detect known hypervisors */
+	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+		set_machine_feature(MFEATURE_KVM);
+	else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+		set_machine_feature(MFEATURE_VM);
+}
+
+static void detect_diag288(void)
+{
+	/* "BEGIN" in EBCDIC character set */
+	static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5";
+	unsigned long action, len;
+
+	action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART;
+	len = machine_is_vm() ? sizeof(cmd) : 0;
+	if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len))
+		return;
+	__diag288(WDT_FUNC_CANCEL, 0, 0, 0);
+	set_machine_feature(MFEATURE_DIAG288);
+}
+
+static void detect_diag9c(void)
+{
+	unsigned int cpu;
+	int rc = 1;
+
+	cpu = stap();
+	asm_inline volatile(
+		"	diag	%[cpu],%%r0,0x9c\n"
+		"0:	lhi	%[rc],0\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [rc] "+d" (rc)
+		: [cpu] "d" (cpu)
+		: "cc", "memory");
+	if (!rc)
+		set_machine_feature(MFEATURE_DIAG9C);
+}
+
+static void reset_tod_clock(void)
+{
+	union tod_clock clk;
+
+	if (store_tod_clock_ext_cc(&clk) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+		disabled_wait();
+	memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+	tod_clock_base.tod = TOD_UNIX_EPOCH;
+	get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
+}
+
+static void detect_facilities(void)
+{
+	if (cpu_has_edat1())
+		local_ctl_set_bit(0, CR0_EDAT_BIT);
+	page_noexec_mask = -1UL;
+	segment_noexec_mask = -1UL;
+	region_noexec_mask = -1UL;
+	if (!cpu_has_nx()) {
+		page_noexec_mask &= ~_PAGE_NOEXEC;
+		segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
+		region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
+	}
+	if (IS_ENABLED(CONFIG_PCI) && test_facility(153))
+		set_machine_feature(MFEATURE_PCI_MIO);
+	reset_tod_clock();
+	if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+		/* Enable signed clock comparator comparisons */
+		set_machine_feature(MFEATURE_SCC);
+		clock_comparator_max = -1UL >> 1;
+		local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
+	}
+	if (test_facility(50) && test_facility(73)) {
+		set_machine_feature(MFEATURE_TX);
+		local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
+	}
+	if (cpu_has_vx())
+		local_ctl_set_bit(0, CR0_VECTOR_BIT);
+}
+
+static int cmma_test_essa(void)
+{
+	unsigned long tmp = 0;
+	int rc = 1;
+
+	/* Test ESSA_GET_STATE */
+	asm_inline volatile(
+		"	.insn	rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+		"0:	lhi	%[rc],0\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [rc] "+d" (rc), [tmp] "+d" (tmp)
+		: [cmd] "i" (ESSA_GET_STATE)
+		: "cc", "memory");
+	return rc;
+}
+
+static void cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa()) {
+		cmma_flag = 0;
+		return;
+	}
+	if (test_facility(147))
+		cmma_flag = 2;
+}
+
 static void setup_lpp(void)
 {
-	S390_lowcore.current_pid = 0;
-	S390_lowcore.lpp = LPP_MAGIC;
+	get_lowcore()->current_pid = 0;
+	get_lowcore()->lpp = LPP_MAGIC;
 	if (test_facility(40))
-		lpp(&S390_lowcore.lpp);
+		lpp(&get_lowcore()->lpp);
 }
 
 #ifdef CONFIG_KERNEL_UNCOMPRESSED
-unsigned long mem_safe_offset(void)
+static unsigned long mem_safe_offset(void)
 {
-	return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+	return (unsigned long)_compressed_start;
+}
+
+static void deploy_kernel(void *output)
+{
+	void *uncompressed_start = (void *)_compressed_start;
+
+	if (output == uncompressed_start)
+		return;
+	memmove(output, uncompressed_start, vmlinux.image_size);
+	memset(uncompressed_start, 0, vmlinux.image_size);
 }
 #endif
 
-static void rescue_initrd(unsigned long addr)
+static void rescue_initrd(unsigned long min, unsigned long max)
 {
+	unsigned long old_addr, addr, size;
+
 	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
 		return;
-	if (!initrd_data.start || !initrd_data.size)
+	if (!get_physmem_reserved(RR_INITRD, &addr, &size))
 		return;
-	if (addr <= initrd_data.start)
+	if (addr >= min && addr + size <= max)
 		return;
-	memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
-	initrd_data.start = addr;
+	old_addr = addr;
+	physmem_free(RR_INITRD);
+	addr = physmem_alloc_or_die(RR_INITRD, size, 0);
+	memmove((void *)addr, (void *)old_addr, size);
 }
 
 static void copy_bootdata(void)
@@ -76,34 +227,33 @@ static void copy_bootdata(void)
 	memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
 }
 
-static void handle_relocs(unsigned long offset)
+static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
+				unsigned long offset, unsigned long phys_offset)
 {
-	Elf64_Rela *rela_start, *rela_end, *rela;
-	int r_type, r_sym, rc;
-	Elf64_Addr loc, val;
-	Elf64_Sym *dynsym;
-
-	rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
-	rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
-	dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
-	for (rela = rela_start; rela < rela_end; rela++) {
-		loc = rela->r_offset + offset;
-		val = rela->r_addend;
-		r_sym = ELF64_R_SYM(rela->r_info);
-		if (r_sym) {
-			if (dynsym[r_sym].st_shndx != SHN_UNDEF)
-				val += dynsym[r_sym].st_value + offset;
-		} else {
-			/*
-			 * 0 == undefined symbol table index (STN_UNDEF),
-			 * used for R_390_RELATIVE, only add KASLR offset
-			 */
-			val += offset;
-		}
-		r_type = ELF64_R_TYPE(rela->r_info);
-		rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
-		if (rc)
-			error("Unknown relocation type");
+	int *reloc;
+	long loc;
+
+	/* Adjust R_390_64 relocations */
+	for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) {
+		loc = (long)*reloc + phys_offset;
+		if (loc < min_addr || loc > max_addr)
+			error("64-bit relocation outside of kernel!\n");
+		*(u64 *)loc += offset;
+	}
+}
+
+static void kaslr_adjust_got(unsigned long offset)
+{
+	u64 *entry;
+
+	/*
+	 * Adjust GOT entries, except for ones for undefined weak symbols
+	 * that resolved to zero. This also skips the first three reserved
+	 * entries on s390x that are zero.
+	 */
+	for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+		if (*entry)
+			*entry += offset;
 	}
 }
 
@@ -111,13 +261,16 @@ static void handle_relocs(unsigned long offset)
  * Merge information from several sources into a single ident_map_size value.
  * "ident_map_size" represents the upper limit of physical memory we may ever
  * reach. It might not be all online memory, but also include standby (offline)
- * memory. "ident_map_size" could be lower then actual standby or even online
+ * memory or memory areas reserved for other means (e.g., memory devices such as
+ * virtio-mem).
+ *
+ * "ident_map_size" could be lower then actual standby/reserved or even online
  * memory present, due to limiting factors. We should never go above this limit.
  * It is the size of our identity mapping.
  *
  * Consider the following factors:
- * 1. max_physmem_end - end of physical memory online or standby.
- *    Always <= end of the last online memory block (get_mem_detect_end()).
+ * 1. max_physmem_end - end of physical memory online, standby or reserved.
+ *    Always >= end of the last online memory range (get_physmem_online_end()).
  * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
  *    kernel is able to support.
  * 3. "mem=" kernel command line option which limits physical memory usage.
@@ -137,76 +290,173 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
 
 #ifdef CONFIG_CRASH_DUMP
 	if (oldmem_data.start) {
-		kaslr_enabled = 0;
+		__kaslr_enabled = 0;
 		ident_map_size = min(ident_map_size, oldmem_data.size);
+		boot_debug("kdump memory limit:  0x%016lx\n", oldmem_data.size);
 	} else if (ipl_block_valid && is_ipl_block_dump()) {
-		kaslr_enabled = 0;
-		if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+		__kaslr_enabled = 0;
+		if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
 			ident_map_size = min(ident_map_size, hsa_size);
+			boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+		}
 	}
 #endif
+	boot_debug("Identity map size:   0x%016lx\n", ident_map_size);
 }
 
-static void setup_kernel_memory_layout(void)
+#define FIXMAP_SIZE	round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
+
+static unsigned long get_vmem_size(unsigned long identity_size,
+				   unsigned long vmemmap_size,
+				   unsigned long vmalloc_size,
+				   unsigned long rte_size)
+{
+	unsigned long max_mappable, vsize;
+
+	max_mappable = max(identity_size, MAX_DCSS_ADDR);
+	vsize = round_up(SZ_2G + max_mappable, rte_size) +
+		round_up(vmemmap_size, rte_size) +
+		FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+	if (IS_ENABLED(CONFIG_KMSAN))
+		vsize += MODULES_LEN * 2;
+	return size_add(vsize, vmalloc_size);
+}
+
+static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
 {
 	unsigned long vmemmap_start;
+	unsigned long kernel_start;
+	unsigned long asce_limit;
 	unsigned long rte_size;
 	unsigned long pages;
+	unsigned long vsize;
+	unsigned long vmax;
 
 	pages = ident_map_size / PAGE_SIZE;
 	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
 	vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
 
 	/* choose kernel address space layout: 4 or 3 levels. */
-	vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
-	if (IS_ENABLED(CONFIG_KASAN) ||
-	    vmalloc_size > _REGION2_SIZE ||
-	    vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
-		    _REGION2_SIZE) {
-		MODULES_END = _REGION1_SIZE;
-		rte_size = _REGION2_SIZE;
+	BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
+	BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
+	vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+	boot_debug("vmem size estimated: 0x%016lx\n", vsize);
+	if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
+	    (vsize > _REGION2_SIZE && kaslr_enabled())) {
+		asce_limit = _REGION1_SIZE;
+		if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) {
+			rte_size = _REGION2_SIZE;
+			vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE);
+		} else {
+			rte_size = _REGION3_SIZE;
+		}
 	} else {
-		MODULES_END = _REGION2_SIZE;
+		asce_limit = _REGION2_SIZE;
 		rte_size = _REGION3_SIZE;
 	}
+
 	/*
-	 * forcing modules and vmalloc area under the ultravisor
+	 * Forcing modules and vmalloc area under the ultravisor
 	 * secure storage limit, so that any vmalloc allocation
 	 * we do could be used to back secure guest storage.
+	 *
+	 * Assume the secure storage limit always exceeds _REGION2_SIZE,
+	 * otherwise asce_limit and rte_size would have been adjusted.
 	 */
-	adjust_to_uv_max(&MODULES_END);
+	vmax = adjust_to_uv_max(asce_limit);
+	boot_debug("%d level paging       0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
 #ifdef CONFIG_KASAN
+	BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+	boot_debug("KASAN shadow area:   0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
 	/* force vmalloc and modules below kasan shadow */
-	MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+	vmax = min(vmax, KASAN_SHADOW_START);
 #endif
+	vsize = min(vsize, vmax);
+	if (kaslr_enabled()) {
+		unsigned long kernel_end, kaslr_len, slots, pos;
+
+		kaslr_len = max(KASLR_LEN, vmax - vsize);
+		slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE);
+		if (get_random(slots, &pos))
+			pos = 0;
+		kernel_end = vmax - pos * THREAD_SIZE;
+		kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+		boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+		boot_debug("kernel image:        0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+			   kernel_size + kernel_size);
+	} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
+		kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
+		boot_debug("kernel image:        0x%016lx-0x%016lx (constrained)\n", kernel_start,
+			   kernel_start + kernel_size);
+	} else {
+		kernel_start = __NO_KASLR_START_KERNEL;
+		boot_debug("kernel image:        0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+			   kernel_start + kernel_size);
+	}
+	__kaslr_offset = kernel_start;
+	boot_debug("__kaslr_offset:      0x%016lx\n", __kaslr_offset);
+
+	MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
 	VMALLOC_END = MODULES_VADDR;
+	if (IS_ENABLED(CONFIG_KMSAN))
+		VMALLOC_END -= MODULES_LEN * 2;
+	boot_debug("modules area:        0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
 
 	/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
-	vmalloc_size = min(vmalloc_size, round_down(VMALLOC_END / 2, _REGION3_SIZE));
+	vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
+	vsize = round_down(vsize, _SEGMENT_SIZE);
+	vmalloc_size = min(vmalloc_size, vsize);
+	if (IS_ENABLED(CONFIG_KMSAN)) {
+		/* take 2/3 of vmalloc area for KMSAN shadow and origins */
+		vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE);
+		VMALLOC_END -= vmalloc_size * 2;
+	}
 	VMALLOC_START = VMALLOC_END - vmalloc_size;
+	boot_debug("vmalloc area:        0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
+
+	__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+	boot_debug("memcpy real area:    0x%016lx-0x%016lx\n", __memcpy_real_area,
+		   __memcpy_real_area + MEMCPY_REAL_SIZE);
+	__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+				   sizeof(struct lowcore));
+	boot_debug("abs lowcore:         0x%016lx-0x%016lx\n", __abs_lowcore,
+		   __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
 
 	/* split remaining virtual space between 1:1 mapping & vmemmap array */
-	pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+	pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
 	pages = SECTION_ALIGN_UP(pages);
 	/* keep vmemmap_start aligned to a top level region table entry */
-	vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
-	/* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
-	vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+	vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size);
 	/* make sure identity map doesn't overlay with vmemmap */
 	ident_map_size = min(ident_map_size, vmemmap_start);
 	vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
-	/* make sure vmemmap doesn't overlay with vmalloc area */
-	VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+	/* make sure vmemmap doesn't overlay with absolute lowcore area */
+	if (vmemmap_start + vmemmap_size > __abs_lowcore) {
+		vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+		ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE;
+	}
 	vmemmap = (struct page *)vmemmap_start;
+	/* maximum address for which linear mapping could be created (DCSS, memory) */
+	BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
+	max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
+	max_mappable = min(max_mappable, vmemmap_start);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+	__identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+	boot_debug("identity map:        0x%016lx-0x%016lx\n", __identity_base,
+		   __identity_base + ident_map_size);
+
+	return asce_limit;
 }
 
 /*
  * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
  */
-static void clear_bss_section(void)
+static void clear_bss_section(unsigned long kernel_start)
 {
-	memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
+	memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size);
 }
 
 /*
@@ -223,78 +473,174 @@ static void setup_vmalloc_size(void)
 	vmalloc_size = max(size, vmalloc_size);
 }
 
-static void offset_vmlinux_info(unsigned long offset)
+static void kaslr_adjust_vmlinux_info(long offset)
 {
-	vmlinux.default_lma += offset;
-	*(unsigned long *)(&vmlinux.entry) += offset;
 	vmlinux.bootdata_off += offset;
 	vmlinux.bootdata_preserved_off += offset;
-	vmlinux.rela_dyn_start += offset;
-	vmlinux.rela_dyn_end += offset;
-	vmlinux.dynsym_start += offset;
-}
-
-static unsigned long reserve_amode31(unsigned long safe_addr)
-{
-	__amode31_base = PAGE_ALIGN(safe_addr);
-	return safe_addr + vmlinux.amode31_size;
+	vmlinux.got_start += offset;
+	vmlinux.got_end += offset;
+	vmlinux.init_mm_off += offset;
+	vmlinux.swapper_pg_dir_off += offset;
+	vmlinux.invalid_pg_dir_off += offset;
+	vmlinux.alt_instructions += offset;
+	vmlinux.alt_instructions_end += offset;
+#ifdef CONFIG_KASAN
+	vmlinux.kasan_early_shadow_page_off += offset;
+	vmlinux.kasan_early_shadow_pte_off += offset;
+	vmlinux.kasan_early_shadow_pmd_off += offset;
+	vmlinux.kasan_early_shadow_pud_off += offset;
+	vmlinux.kasan_early_shadow_p4d_off += offset;
+#endif
 }
 
 void startup_kernel(void)
 {
-	unsigned long random_lma;
+	unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+	unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
+	unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
+	unsigned long kaslr_large_page_offset;
+	unsigned long max_physmem_end;
+	unsigned long asce_limit;
 	unsigned long safe_addr;
-	void *img;
-
-	initrd_data.start = parmarea.initrd_start;
-	initrd_data.size = parmarea.initrd_size;
-	oldmem_data.start = parmarea.oldmem_base;
-	oldmem_data.size = parmarea.oldmem_size;
+	psw_t psw;
 
 	setup_lpp();
 	store_ipl_parmblock();
-	safe_addr = mem_safe_offset();
-	safe_addr = reserve_amode31(safe_addr);
-	safe_addr = read_ipl_report(safe_addr);
 	uv_query_info();
-	rescue_initrd(safe_addr);
-	sclp_early_read_info();
 	setup_boot_command_line();
 	parse_boot_command_line();
+
+	/*
+	 * Non-randomized kernel physical start address must be _SEGMENT_SIZE
+	 * aligned (see blow).
+	 */
+	nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
+	safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
+
+	/*
+	 * Reserve decompressor memory together with decompression heap,
+	 * buffer and memory which might be occupied by uncompressed kernel
+	 * (if KASLR is off or failed).
+	 */
+	physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
+		physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
+	oldmem_data.start = parmarea.oldmem_base;
+	oldmem_data.size = parmarea.oldmem_size;
+
+	read_ipl_report();
+	sclp_early_read_info();
+	sclp_early_detect_machine_features();
+	detect_facilities();
+	detect_diag9c();
+	detect_machine_type();
+	/* detect_diag288() needs machine type */
+	detect_diag288();
+	cmma_init();
 	sanitize_prot_virt_host();
-	setup_ident_map_size(detect_memory());
+	max_physmem_end = detect_max_physmem_end();
+	setup_ident_map_size(max_physmem_end);
 	setup_vmalloc_size();
-	setup_kernel_memory_layout();
-
-	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
-		random_lma = get_random_base(safe_addr);
-		if (random_lma) {
-			__kaslr_offset = random_lma - vmlinux.default_lma;
-			img = (void *)vmlinux.default_lma;
-			offset_vmlinux_info(__kaslr_offset);
-		}
+	asce_limit = setup_kernel_memory_layout(kernel_size);
+	/* got final ident_map_size, physmem allocations could be performed now */
+	physmem_set_usable_limit(ident_map_size);
+	detect_physmem_online_ranges(max_physmem_end);
+	save_ipl_cert_comp_list();
+	rescue_initrd(safe_addr, ident_map_size);
+
+	/*
+	 * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower
+	 * 20 bits (the offset within a large page) are zero. Copy the last
+	 * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to
+	 * __kaslr_offset_phys.
+	 *
+	 * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset
+	 * are identical, which is required to allow for large mappings of the
+	 * kernel image.
+	 */
+	kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
+	if (kaslr_enabled()) {
+		unsigned long size = vmlinux_size + kaslr_large_page_offset;
+
+		text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
 	}
+	if (!text_lma)
+		text_lma = nokaslr_text_lma;
+	text_lma |= kaslr_large_page_offset;
 
-	if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
-		img = decompress_kernel();
-		memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
-	} else if (__kaslr_offset)
-		memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+	/*
+	 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
+	 * never accessed via the kernel image mapping as per the linker script:
+	 *
+	 *	. = TEXT_OFFSET;
+	 *
+	 * Therefore, this region could be used for something else and does
+	 * not need to be reserved. See how it is skipped in setup_vmem().
+	 */
+	__kaslr_offset_phys = text_lma - TEXT_OFFSET;
+	kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
+	physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
+	deploy_kernel((void *)text_lma);
 
-	clear_bss_section();
-	copy_bootdata();
-	if (IS_ENABLED(CONFIG_RELOCATABLE))
-		handle_relocs(__kaslr_offset);
-
-	if (__kaslr_offset) {
-		/*
-		 * Save KASLR offset for early dumps, before vmcore_info is set.
-		 * Mark as uneven to distinguish from real vmcore_info pointer.
-		 */
-		S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
-		/* Clear non-relocated kernel */
-		if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
-			memset(img, 0, vmlinux.image_size);
+	/* vmlinux decompression is done, shrink reserved low memory */
+	physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
+
+	/*
+	 * In case KASLR is enabled the randomized location of .amode31
+	 * section might overlap with .vmlinux.relocs section. To avoid that
+	 * the below randomize_within_range() could have been called with
+	 * __vmlinux_relocs_64_end as the lower range address. However,
+	 * .amode31 section is written to by the decompressed kernel - at
+	 * that time the contents of .vmlinux.relocs is not needed anymore.
+	 * Conversely, .vmlinux.relocs is read only by the decompressor, even
+	 * before the kernel started. Therefore, in case the two sections
+	 * overlap there is no risk of corrupting any data.
+	 */
+	if (kaslr_enabled()) {
+		unsigned long amode31_min;
+
+		amode31_min = (unsigned long)_decompressor_end;
+		amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
 	}
-	vmlinux.entry();
+	if (!amode31_lma)
+		amode31_lma = text_lma - vmlinux.amode31_size;
+	physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
+
+	/*
+	 * The order of the following operations is important:
+	 *
+	 * - kaslr_adjust_relocs() must follow clear_bss_section() to establish
+	 *   static memory references to data in .bss to be used by setup_vmem()
+	 *   (i.e init_mm.pgd)
+	 *
+	 * - setup_vmem() must follow kaslr_adjust_relocs() to be able using
+	 *   static memory references to data in .bss (i.e init_mm.pgd)
+	 *
+	 * - copy_bootdata() must follow setup_vmem() to propagate changes
+	 *   to bootdata made by setup_vmem()
+	 */
+	clear_bss_section(text_lma);
+	kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
+			    __kaslr_offset, __kaslr_offset_phys);
+	kaslr_adjust_got(__kaslr_offset);
+	setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+	dump_physmem_reserved();
+	copy_bootdata();
+	__apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
+			     (struct alt_instr *)_vmlinux_info.alt_instructions_end,
+			     ALT_CTX_EARLY);
+
+	/*
+	 * Save KASLR offset for early dumps, before vmcore_info is set.
+	 * Mark as uneven to distinguish from real vmcore_info pointer.
+	 */
+	get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
+
+	/*
+	 * Jump to the decompressed kernel entry point and switch DAT mode on.
+	 */
+	psw.addr = __kaslr_offset + vmlinux.entry;
+	psw.mask = PSW_KERNEL_BITS;
+	boot_debug("Starting kernel at:  0x%016lx\n", psw.addr);
+	__load_psw(psw);
 }
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index faccb33b462c..bd68161434a6 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -1,11 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
+#define IN_BOOT_STRING_C 1
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #undef CONFIG_KASAN
 #undef CONFIG_KASAN_GENERIC
+#undef CONFIG_KMSAN
 #include "../lib/string.c"
 
+/*
+ * Duplicate some functions from the common lib/string.c
+ * instead of fully including it.
+ */
+
 int strncmp(const char *cs, const char *ct, size_t count)
 {
 	unsigned char c1, c2;
@@ -22,6 +29,27 @@ int strncmp(const char *cs, const char *ct, size_t count)
 	return 0;
 }
 
+ssize_t sized_strscpy(char *dst, const char *src, size_t count)
+{
+	size_t len;
+
+	if (count == 0)
+		return -E2BIG;
+	len = strnlen(src, count - 1);
+	memcpy(dst, src, len);
+	dst[len] = '\0';
+	return src[len] ? -E2BIG : len;
+}
+
+void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+	uint64_t *xs = s;
+
+	while (count--)
+		*xs++ = v;
+	return s;
+}
+
 char *skip_spaces(const char *str)
 {
 	while (isspace(*str))
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index e6be155ab2e5..4568e8f81dac 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -8,12 +8,8 @@
 #include "uv.h"
 
 /* will be used in arch/s390/kernel/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 int __bootdata_preserved(prot_virt_guest);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
 int __bootdata_preserved(prot_virt_host);
-#endif
 struct uv_info __bootdata_preserved(uv_info);
 
 void uv_query_info(void)
@@ -26,8 +22,8 @@ void uv_query_info(void)
 	if (!test_facility(158))
 		return;
 
-	/* rc==0x100 means that there is additional data we do not process */
-	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
+	/* Ignore that there might be more data we do not process */
+	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != UVC_RC_MORE_DATA)
 		return;
 
 	if (IS_ENABLED(CONFIG_KVM)) {
@@ -41,20 +37,29 @@ void uv_query_info(void)
 		uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
 		uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
 		uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+		uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
+		uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
+		uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
+		uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
+		uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
+		uv_info.supp_att_pflags = uvcb.supp_att_pflags;
+		uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
+		uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
+		uv_info.supp_secret_types = uvcb.supp_secret_types;
+		uv_info.max_assoc_secrets = uvcb.max_assoc_secrets;
+		uv_info.max_retr_secrets = uvcb.max_retr_secrets;
 	}
 
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 	if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
 	    test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
 		prot_virt_guest = 1;
-#endif
 }
 
-#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax)
+unsigned long adjust_to_uv_max(unsigned long limit)
 {
 	if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
-		*vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+		limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
+	return limit;
 }
 
 static int is_prot_virt_host_capable(void)
@@ -81,4 +86,3 @@ void sanitize_prot_virt_host(void)
 {
 	prot_virt_host = is_prot_virt_host_capable();
 }
-#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 690ce019af5a..da4a4a8d48e0 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -2,18 +2,8 @@
 #ifndef BOOT_UV_H
 #define BOOT_UV_H
 
-#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax);
+unsigned long adjust_to_uv_max(unsigned long limit);
 void sanitize_prot_virt_host(void);
-#else
-static inline void adjust_to_uv_max(unsigned long *vmax) {}
-static inline void sanitize_prot_virt_host(void) {}
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
 void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
 
 #endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
index d32e58bdda6a..fd32f038777f 100644
--- a/arch/s390/boot/version.c
+++ b/arch/s390/boot/version.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
 #include <generated/utsrelease.h>
 #include <generated/compile.h>
 #include "boot.h"
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
new file mode 100644
index 000000000000..1d073acd05a7
--- /dev/null
+++ b/arch/s390/boot/vmem.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
+#include <linux/cpufeature.h>
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <linux/kasan.h>
+#include <asm/page-states.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/ctlreg.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/machine.h>
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
+struct ctlreg __bootdata_preserved(s390_invalid_asce);
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
+#endif
+
+#define init_mm			(*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir		vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir		vmlinux.invalid_pg_dir_off
+
+enum populate_mode {
+	POPULATE_NONE,
+	POPULATE_DIRECT,
+	POPULATE_LOWCORE,
+	POPULATE_ABS_LOWCORE,
+	POPULATE_IDENTITY,
+	POPULATE_KERNEL,
+#ifdef CONFIG_KASAN
+	/* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
+	POPULATE_KASAN_MAP_SHADOW,
+	POPULATE_KASAN_ZERO_SHADOW,
+	POPULATE_KASAN_SHALLOW
+#endif
+};
+
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+	switch (t) {
+	POPULATE_MODE_NAME(NONE);
+	POPULATE_MODE_NAME(DIRECT);
+	POPULATE_MODE_NAME(LOWCORE);
+	POPULATE_MODE_NAME(ABS_LOWCORE);
+	POPULATE_MODE_NAME(IDENTITY);
+	POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+	POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+	POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+	POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+	return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+	return false;
+#endif
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
+
+#ifdef CONFIG_KASAN
+
+#define kasan_early_shadow_page	vmlinux.kasan_early_shadow_page_off
+#define kasan_early_shadow_pte	((pte_t *)vmlinux.kasan_early_shadow_pte_off)
+#define kasan_early_shadow_pmd	((pmd_t *)vmlinux.kasan_early_shadow_pmd_off)
+#define kasan_early_shadow_pud	((pud_t *)vmlinux.kasan_early_shadow_pud_off)
+#define kasan_early_shadow_p4d	((p4d_t *)vmlinux.kasan_early_shadow_p4d_off)
+#define __sha(x)		((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static pte_t pte_z;
+
+static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
+{
+	unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+	unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+	boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+		   start, end, sha_start, sha_end);
+	pgtable_populate(sha_start, sha_end, mode);
+}
+
+static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
+{
+	pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
+	pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
+	p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+	unsigned long memgap_start = 0;
+	unsigned long start, end;
+	int i;
+
+	pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
+	crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
+	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+	__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat(kasan_early_shadow_pte, 1);
+
+	for_each_physmem_usable_range(i, &start, &end) {
+		kasan_populate((unsigned long)__identity_va(start),
+			       (unsigned long)__identity_va(end),
+			       POPULATE_KASAN_MAP_SHADOW);
+		if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) {
+			kasan_populate((unsigned long)__identity_va(memgap_start),
+				       (unsigned long)__identity_va(start),
+				       POPULATE_KASAN_ZERO_SHADOW);
+		}
+		memgap_start = end;
+	}
+	kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+	kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
+	kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
+	/* shallowly populate kasan shadow for vmalloc and modules */
+	kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
+	/* populate kasan shadow for untracked memory */
+	kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
+		       POPULATE_KASAN_ZERO_SHADOW);
+	kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
+}
+
+static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+		pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
+		p4d_populate(&init_mm, p4d, kasan_early_shadow_pud);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
+		pud_populate(&init_mm, pud, kasan_early_shadow_pmd);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+					   unsigned long end, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW &&
+	    IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
+		pmd_populate(&init_mm, pmd, kasan_early_shadow_pte);
+		return true;
+	}
+	return false;
+}
+
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
+{
+	if (mode == POPULATE_KASAN_ZERO_SHADOW) {
+		set_pte(pte, pte_z);
+		return true;
+	}
+	return false;
+}
+#else
+
+static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
+{
+}
+
+static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr,
+						  unsigned long end, enum populate_mode mode)
+{
+	return false;
+}
+
+static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
+{
+	return false;
+}
+
+#endif
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+	return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+	unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
+	unsigned long *table;
+
+	table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
+	crst_table_init(table, val);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+	return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+	static void *pte_leftover;
+	pte_t *pte;
+
+	/*
+	 * handling pte_leftovers this way helps to avoid memory fragmentation
+	 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off
+	 */
+	if (!pte_leftover) {
+		pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+		pte = pte_leftover + _PAGE_TABLE_SIZE;
+		__arch_set_page_dat(pte, 1);
+	} else {
+		pte = pte_leftover;
+		pte_leftover = NULL;
+	}
+
+	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+	return pte;
+}
+
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+					  enum populate_mode mode)
+{
+	switch (mode) {
+	case POPULATE_NONE:
+		return INVALID_PHYS_ADDR;
+	case POPULATE_DIRECT:
+		return addr;
+	case POPULATE_LOWCORE:
+		return __lowcore_pa(addr);
+	case POPULATE_ABS_LOWCORE:
+		return __abs_lowcore_pa(addr);
+	case POPULATE_KERNEL:
+		return __kernel_pa(addr);
+	case POPULATE_IDENTITY:
+		return __identity_pa(addr);
+#ifdef CONFIG_KASAN
+	case POPULATE_KASAN_MAP_SHADOW:
+		/* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+		addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+		if (addr) {
+			memset((void *)addr, 0, size);
+			return addr;
+		}
+		return INVALID_PHYS_ADDR;
+#endif
+	default:
+		return INVALID_PHYS_ADDR;
+	}
+}
+
+static bool large_page_mapping_allowed(enum populate_mode mode)
+{
+	switch (mode) {
+	case POPULATE_DIRECT:
+	case POPULATE_IDENTITY:
+	case POPULATE_KERNEL:
+#ifdef CONFIG_KASAN
+	case POPULATE_KASAN_MAP_SHADOW:
+#endif
+		return true;
+	default:
+		return false;
+	}
+}
+
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+					  enum populate_mode mode)
+{
+	unsigned long pa, size = end - addr;
+
+	if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) ||
+	    !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+		return INVALID_PHYS_ADDR;
+
+	pa = resolve_pa_may_alloc(addr, size, mode);
+	if (!IS_ALIGNED(pa, PUD_SIZE))
+		return INVALID_PHYS_ADDR;
+
+	return pa;
+}
+
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+					  enum populate_mode mode)
+{
+	unsigned long pa, size = end - addr;
+
+	if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) ||
+	    !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+		return INVALID_PHYS_ADDR;
+
+	pa = resolve_pa_may_alloc(addr, size, mode);
+	if (!IS_ALIGNED(pa, PMD_SIZE))
+		return INVALID_PHYS_ADDR;
+
+	return pa;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long pages = 0;
+	pte_t *pte, entry;
+
+	pte = pte_offset_kernel(pmd, addr);
+	for (; addr < end; addr += PAGE_SIZE, pte++) {
+		if (pte_none(*pte)) {
+			if (kasan_pte_populate_zero_shadow(pte, mode))
+				continue;
+			entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
+			entry = set_pte_bit(entry, PAGE_KERNEL);
+			set_pte(pte, entry);
+			pages++;
+		}
+	}
+	if (mode == POPULATE_IDENTITY)
+		update_page_count(PG_DIRECT_MAP_4K, pages);
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long pa, next, pages = 0;
+	pmd_t *pmd, entry;
+	pte_t *pte;
+
+	pmd = pmd_offset(pud, addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd)) {
+			if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
+				continue;
+			pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+			if (pa != INVALID_PHYS_ADDR) {
+				entry = __pmd(pa);
+				entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+				set_pmd(pmd, entry);
+				pages++;
+				continue;
+			}
+			pte = boot_pte_alloc();
+			pmd_populate(&init_mm, pmd, pte);
+		} else if (pmd_leaf(*pmd)) {
+			continue;
+		}
+		pgtable_pte_populate(pmd, addr, next, mode);
+	}
+	if (mode == POPULATE_IDENTITY)
+		update_page_count(PG_DIRECT_MAP_1M, pages);
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long pa, next, pages = 0;
+	pud_t *pud, entry;
+	pmd_t *pmd;
+
+	pud = pud_offset(p4d, addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+		if (pud_none(*pud)) {
+			if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
+				continue;
+			pa = try_get_large_pud_pa(pud, addr, next, mode);
+			if (pa != INVALID_PHYS_ADDR) {
+				entry = __pud(pa);
+				entry = set_pud_bit(entry, REGION3_KERNEL);
+				set_pud(pud, entry);
+				pages++;
+				continue;
+			}
+			pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			pud_populate(&init_mm, pud, pmd);
+		} else if (pud_leaf(*pud)) {
+			continue;
+		}
+		pgtable_pmd_populate(pud, addr, next, mode);
+	}
+	if (mode == POPULATE_IDENTITY)
+		update_page_count(PG_DIRECT_MAP_2G, pages);
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+				 enum populate_mode mode)
+{
+	unsigned long next;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	p4d = p4d_offset(pgd, addr);
+	for (; addr < end; addr = next, p4d++) {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(*p4d)) {
+			if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode))
+				continue;
+			pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+			p4d_populate(&init_mm, p4d, pud);
+		}
+		pgtable_pud_populate(p4d, addr, next, mode);
+	}
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
+{
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	if (!is_kasan_populate_mode(mode)) {
+		boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+			   get_populate_mode_name(mode), addr, end,
+			   resolve_pa_may_alloc(addr, 0, mode),
+			   resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+	}
+
+	pgd = pgd_offset(&init_mm, addr);
+	for (; addr < end; addr = next, pgd++) {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(*pgd)) {
+			if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode))
+				continue;
+			p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+			pgd_populate(&init_mm, pgd, p4d);
+		}
+#ifdef CONFIG_KASAN
+		if (mode == POPULATE_KASAN_SHALLOW)
+			continue;
+#endif
+		pgtable_p4d_populate(pgd, addr, next, mode);
+	}
+}
+
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
+{
+	unsigned long lowcore_address = 0;
+	unsigned long start, end;
+	unsigned long asce_type;
+	unsigned long asce_bits;
+	pgd_t *init_mm_pgd;
+	int i;
+
+	/*
+	 * Mark whole memory as no-dat. This must be done before any
+	 * page tables are allocated, or kernel image builtin pages
+	 * are marked as dat tables.
+	 */
+	for_each_physmem_online_range(i, &start, &end)
+		__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+
+	/*
+	 * init_mm->pgd contains virtual address of swapper_pg_dir.
+	 * It is unusable at this stage since DAT is yet off. Swap
+	 * it for physical address of swapper_pg_dir and restore
+	 * the virtual address after all page tables are created.
+	 */
+	init_mm_pgd = init_mm.pgd;
+	init_mm.pgd = (pgd_t *)swapper_pg_dir;
+
+	if (asce_limit == _REGION1_SIZE) {
+		asce_type = _REGION2_ENTRY_EMPTY;
+		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+	} else {
+		asce_type = _REGION3_ENTRY_EMPTY;
+		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+	}
+	s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+	__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
+	__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
+
+	if (machine_has_relocated_lowcore())
+		lowcore_address = LOWCORE_ALT_ADDRESS;
+
+	/*
+	 * To allow prefixing the lowcore must be mapped with 4KB pages.
+	 * To prevent creation of a large page at address 0 first map
+	 * the lowcore and create the identity mapping only afterwards.
+	 */
+	pgtable_populate(lowcore_address,
+			 lowcore_address + sizeof(struct lowcore),
+			 POPULATE_LOWCORE);
+	for_each_physmem_usable_range(i, &start, &end) {
+		pgtable_populate((unsigned long)__identity_va(start),
+				 (unsigned long)__identity_va(end),
+				 POPULATE_IDENTITY);
+	}
+
+	/*
+	 * [kernel_start..kernel_start + TEXT_OFFSET] region is never
+	 * accessed as per the linker script:
+	 *
+	 *	. = TEXT_OFFSET;
+	 *
+	 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
+	 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
+	 */
+	pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
+	pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
+	pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+			 POPULATE_ABS_LOWCORE);
+	pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
+			 POPULATE_NONE);
+	memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
+
+	kasan_populate_shadow(kernel_start, kernel_end);
+
+	get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
+	get_lowcore()->user_asce = s390_invalid_asce;
+
+	local_ctl_load(1, &get_lowcore()->kernel_asce);
+	local_ctl_load(7, &get_lowcore()->user_asce);
+	local_ctl_load(13, &get_lowcore()->kernel_asce);
+
+	init_mm.context.asce = get_lowcore()->kernel_asce.val;
+	init_mm.pgd = init_mm_pgd;
+}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index af5c6860e0a1..50988022f9ea 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -31,6 +31,7 @@ SECTIONS
 		_text = .;	/* Text */
 		*(.text)
 		*(.text.*)
+		INIT_TEXT
 		_etext = . ;
 	}
 	.rodata : {
@@ -39,6 +40,10 @@ SECTIONS
 		*(.rodata.*)
 		_erodata = . ;
 	}
+	EXCEPTION_TABLE(16)
+	.got : {
+		*(.got)
+	}
 	NOTES
 	.data :	{
 		_data = . ;
@@ -93,8 +98,24 @@ SECTIONS
 		_decompressor_syms_end = .;
 	}
 
+	_decompressor_end = .;
+
+	. = ALIGN(4);
+	.vmlinux.relocs : {
+		__vmlinux_relocs_64_start = .;
+		*(.vmlinux.relocs_64)
+		__vmlinux_relocs_64_end = .;
+	}
+
 #ifdef CONFIG_KERNEL_UNCOMPRESSED
-	. = 0x100000;
+	. = ALIGN(PAGE_SIZE);
+	. += AMODE31_SIZE;		/* .amode31 section */
+
+	/*
+	 * Make sure the location counter is not less than TEXT_OFFSET.
+	 * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
+	 */
+	. = MAX(TEXT_OFFSET, ALIGN(1 << 20));
 #else
 	. = ALIGN(8);
 #endif
@@ -102,15 +123,49 @@ SECTIONS
 		_compressed_start = .;
 		*(.vmlinux.bin.compressed)
 		_compressed_end = .;
-		FILL(0xff);
-		. = ALIGN(4096);
+	}
+
+#define SB_TRAILER_SIZE 32
+	/* Trailer needed for Secure Boot */
+	. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+	. = ALIGN(4096) - SB_TRAILER_SIZE;
+	.sb.trailer : {
+		QUAD(0)
+		QUAD(0)
+		QUAD(0)
+		QUAD(0x000000207a49504c)
 	}
 	_end = .;
 
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	/*
+	 * Make sure that the .got.plt is either completely empty or it
+	 * contains only the three reserved double words.
+	 */
+	.got.plt : {
+		*(.got.plt)
+	}
+	ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+	/*
+	 * Sections that should stay zero sized, which is safer to
+	 * explicitly check instead of blindly discarding.
+	 */
+	.plt : {
+		*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+	}
+	ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+	.rela.dyn : {
+		*(.rela.*) *(.rela_*)
+	}
+	ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
 	/* Sections to be discarded */
 	/DISCARD/ : {
+		COMMON_DISCARDS
 		*(.eh_frame)
-		*(__ex_table)
 		*(*__ksymtab*)
 		*(___kcrctab*)
 	}
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
new file mode 100644
index 000000000000..eb7f84f5925c
--- /dev/null
+++ b/arch/s390/configs/btf.config
@@ -0,0 +1,2 @@
+# Help: Enable BTF debug info
+CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config
new file mode 100644
index 000000000000..6fd051453ae8
--- /dev/null
+++ b/arch/s390/configs/compat.config
@@ -0,0 +1,3 @@
+# Help: Enable compat support
+CONFIG_COMPAT=y
+CONFIG_COMPAT_32BIT_TIME=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index f6dfde577ce8..8ecad727497e 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -23,7 +23,6 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
 CONFIG_CGROUP_FREEZER=y
@@ -39,28 +38,26 @@ CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
 CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
 CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_SIG=y
+CONFIG_CERT_STORE=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_CHSC_SCH=y
 CONFIG_VFIO_CCW=m
 CONFIG_VFIO_AP=m
-CONFIG_CRASH_DUMP=y
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
+CONFIG_S390_HYPFS_FS=y
 CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
 CONFIG_S390_UNWIND_SELFTEST=m
 CONFIG_S390_KPROBES_SANITY_TEST=m
 CONFIG_S390_MODULES_SANITY_TEST=m
@@ -74,6 +71,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_MODULE_SIG_SHA256=y
@@ -91,25 +89,26 @@ CONFIG_MINIX_SUBPARTITION=y
 CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
+CONFIG_SLUB_STATS=y
+# CONFIG_COMPAT_BRK is not set
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA_DEBUG=y
 CONFIG_CMA_DEBUGFS=y
 CONFIG_CMA_SYSFS=y
 CONFIG_CMA_AREAS=7
 CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PERCPU_STATS=y
 CONFIG_GUP_TEST=y
 CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -117,8 +116,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
-CONFIG_SMC=m
 CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -133,7 +132,6 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -167,6 +165,8 @@ CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NETFILTER_NETLINK_HOOK=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_PROCFS=y
 CONFIG_NF_CONNTRACK_EVENTS=y
 CONFIG_NF_CONNTRACK_TIMEOUT=y
 CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -182,17 +182,39 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_HASH=m
 CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_AUDIT=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -205,8 +227,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
 CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
 CONFIG_NETFILTER_XT_TARGET_LOG=m
 CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
 CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
 CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -215,6 +239,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
 CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
 CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
 CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
 CONFIG_NETFILTER_XT_MATCH_COMMENT=m
 CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -229,6 +254,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
 CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
 CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
 CONFIG_NETFILTER_XT_MATCH_IPVS=m
 CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -246,6 +272,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -297,12 +324,10 @@ CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
 CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_FIB_IPV6=m
@@ -339,7 +364,6 @@ CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
@@ -350,7 +374,6 @@ CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
 CONFIG_NET_SCH_TBF=m
 CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_DRR=m
 CONFIG_NET_SCH_MQPRIO=m
@@ -362,29 +385,32 @@ CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
 CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_FLOW=m
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=m
 CONFIG_NET_ACT_GACT=m
 CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
 CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -401,8 +427,16 @@ CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
@@ -438,9 +472,8 @@ CONFIG_SCSI_DH_EMC=m
 CONFIG_SCSI_DH_ALUA=m
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=y
+# CONFIG_MD_BITMAP_FILE is not set
 CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
 CONFIG_MD_CLUSTER=m
 CONFIG_BCACHE=m
 CONFIG_BLK_DEV_DM=y
@@ -465,8 +498,10 @@ CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
 CONFIG_DM_VERITY=m
 CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
 CONFIG_DM_SWITCH=m
 CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
 CONFIG_NETDEVICES=y
 CONFIG_BONDING=m
 CONFIG_DUMMY=m
@@ -493,7 +528,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_ASIX is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_CADENCE is not set
 # CONFIG_NET_VENDOR_CAVIUM is not set
 # CONFIG_NET_VENDOR_CHELSIO is not set
@@ -509,25 +543,27 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
 # CONFIG_NET_VENDOR_LITEX is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
 CONFIG_MLX5_CORE=m
 CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_META is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_MICROCHIP is not set
 # CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
 # CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETERION is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
 # CONFIG_NET_VENDOR_PACKET_ENGINES is not set
 # CONFIG_NET_VENDOR_PENSANDO is not set
 # CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
 # CONFIG_NET_VENDOR_REALTEK is not set
@@ -535,9 +571,9 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SILAN is not set
 # CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_SOCIONEXT is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
@@ -547,6 +583,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VERTEXCOM is not set
 # CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -566,6 +603,7 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_HANGCHECK_TIMER=m
@@ -577,28 +615,33 @@ CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_SOFT_WATCHDOG=m
 CONFIG_DIAG288_WATCHDOG=m
-# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_FB_DEVICE is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
 CONFIG_MLX5_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_MEM=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-CONFIG_S390_CCW_IOMMU=y
-CONFIG_S390_AP_IOMMU=y
+CONFIG_VHOST_VDPA=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -620,7 +663,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_BTRFS_DEBUG=y
 CONFIG_BTRFS_ASSERT=y
 CONFIG_NILFS2_FS=m
-CONFIG_FS_DAX=y
+CONFIG_BCACHEFS_FS=y
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FS_ENCRYPTION=y
 CONFIG_FS_VERITY=y
@@ -631,13 +676,13 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QUOTA_DEBUG=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
@@ -647,16 +692,16 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_EXFAT_FS=m
 CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_TMPFS_INODE64=y
+CONFIG_TMPFS_QUOTA=y
 CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
@@ -664,6 +709,7 @@ CONFIG_SQUASHFS_XZ=y
 CONFIG_SQUASHFS_ZSTD=y
 CONFIG_ROMFS_FS=m
 CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -671,6 +717,7 @@ CONFIG_NFSD=m
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
 CONFIG_CIFS=m
 CONFIG_CIFS_UPCALL=y
 CONFIG_CIFS_XATTR=y
@@ -688,61 +735,39 @@ CONFIG_NLS_UTF8=m
 CONFIG_DLM=m
 CONFIG_UNICODE=y
 CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_KEY_NOTIFICATIONS=y
 CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_HARDENED_USERCOPY=y
-CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
 CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_SECURITY_LANDLOCK=y
 CONFIG_INTEGRITY_SIGNATURE=y
 CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
 CONFIG_IMA=y
 CONFIG_IMA_DEFAULT_HASH_SHA256=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
 CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
 CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_DES=m
@@ -750,9 +775,26 @@ CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
@@ -762,28 +804,26 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_STATS=y
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA3_256_S390=m
 CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_HMAC_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_PKEY_CCA=m
+CONFIG_PKEY_EP11=m
+CONFIG_PKEY_PCKMO=m
+CONFIG_PKEY_UV=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
 CONFIG_CORDIC=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC32_SELFTEST=y
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
 CONFIG_RANDOM32_SELFTEST=y
 CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_DMA_CMA=y
@@ -791,12 +831,12 @@ CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
 CONFIG_PAGE_OWNER=y
 CONFIG_DEBUG_RODATA_TEST=y
 CONFIG_DEBUG_WX=y
@@ -808,11 +848,10 @@ CONFIG_DEBUG_OBJECTS_TIMERS=y
 CONFIG_DEBUG_OBJECTS_WORK=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
-CONFIG_SLUB_DEBUG_ON=y
-CONFIG_SLUB_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VIRTUAL=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
@@ -824,21 +863,26 @@ CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_TEST_LOCKUP=m
+CONFIG_DEBUG_PREEMPT=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
+CONFIG_LOCKDEP_BITS=16
+CONFIG_LOCKDEP_CHAINS_BITS=17
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
 CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_DEBUG_CREDENTIALS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 # CONFIG_RCU_TRACE is not set
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
+CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
 CONFIG_IRQSOFF_TRACER=y
@@ -846,15 +890,18 @@ CONFIG_PREEMPT_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_USER_EVENTS=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_FTRACE_STARTUP_TEST=y
 # CONFIG_EVENT_TRACE_STARTUP_TEST is not set
+CONFIG_FTRACE_SORT_STARTUP_TEST=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
 CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
 CONFIG_DEBUG_ENTRY=y
+CONFIG_STRICT_MM_TYPECHECKS=y
 CONFIG_CIO_INJECT=y
 CONFIG_KUNIT=m
 CONFIG_KUNIT_DEBUGFS=y
@@ -868,7 +915,7 @@ CONFIG_FAIL_MAKE_REQUEST=y
 CONFIG_FAIL_IO_TIMEOUT=y
 CONFIG_FAIL_FUTEX=y
 CONFIG_FAULT_INJECTION_DEBUG_FS=y
-CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION_CONFIGFS=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LKDTM=m
 CONFIG_TEST_MIN_HEAP=y
@@ -877,7 +924,5 @@ CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_STRING_SELFTEST=y
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_BPF=m
-CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 706df3a4a867..c13a77765162 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -21,7 +21,6 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
 CONFIG_CGROUP_FREEZER=y
@@ -37,27 +36,24 @@ CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_SIG=y
 CONFIG_LIVEPATCH=y
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
+CONFIG_MARCH_Z13=y
 CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_SIG=y
+CONFIG_CERT_STORE=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_CHSC_SCH=y
 CONFIG_VFIO_CCW=m
 CONFIG_VFIO_AP=m
-CONFIG_CRASH_DUMP=y
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
+CONFIG_S390_HYPFS_FS=y
 CONFIG_KVM=m
 CONFIG_S390_UNWIND_SELFTEST=m
 CONFIG_S390_KPROBES_SANITY_TEST=m
@@ -69,6 +65,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_MODULE_SIG_SHA256=y
@@ -86,8 +83,11 @@ CONFIG_MINIX_SUBPARTITION=y
 CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLAB_BUCKETS=y
+# CONFIG_COMPAT_BRK is not set
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -95,13 +95,11 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA_SYSFS=y
 CONFIG_CMA_AREAS=7
 CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PERCPU_STATS=y
 CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -109,8 +107,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
-CONFIG_SMC=m
 CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -125,7 +123,6 @@ CONFIG_IP_MROUTE=y
 CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
@@ -159,6 +156,8 @@ CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NETFILTER_NETLINK_HOOK=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CONNTRACK_PROCFS=y
 CONFIG_NF_CONNTRACK_EVENTS=y
 CONFIG_NF_CONNTRACK_TIMEOUT=y
 CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -174,17 +173,39 @@ CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
-CONFIG_NFT_OBJREF=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_HASH=m
 CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_AUDIT=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -197,8 +218,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
 CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
 CONFIG_NETFILTER_XT_TARGET_LOG=m
 CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
 CONFIG_NETFILTER_XT_TARGET_NFLOG=m
 CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
 CONFIG_NETFILTER_XT_TARGET_TEE=m
 CONFIG_NETFILTER_XT_TARGET_TPROXY=m
 CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -207,6 +230,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
 CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
 CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
 CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
 CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
 CONFIG_NETFILTER_XT_MATCH_COMMENT=m
 CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -221,6 +245,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
 CONFIG_NETFILTER_XT_MATCH_ESP=m
 CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
 CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
 CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
 CONFIG_NETFILTER_XT_MATCH_IPVS=m
 CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -238,6 +263,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
 CONFIG_NETFILTER_XT_MATCH_RATEEST=m
 CONFIG_NETFILTER_XT_MATCH_REALM=m
 CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -289,12 +315,10 @@ CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
 CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NFT_FIB_IPV6=m
@@ -330,7 +354,6 @@ CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
@@ -341,7 +364,6 @@ CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
 CONFIG_NET_SCH_TBF=m
 CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_DRR=m
 CONFIG_NET_SCH_MQPRIO=m
@@ -353,29 +375,32 @@ CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
 CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_FLOW=m
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=m
 CONFIG_NET_ACT_GACT=m
 CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
 CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -392,8 +417,16 @@ CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_UEVENT_HELPER=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
+CONFIG_ZRAM_BACKEND_LZ4=y
+CONFIG_ZRAM_BACKEND_LZ4HC=y
+CONFIG_ZRAM_BACKEND_ZSTD=y
+CONFIG_ZRAM_BACKEND_DEFLATE=y
+CONFIG_ZRAM_BACKEND_842=y
+CONFIG_ZRAM_BACKEND_LZO=y
+CONFIG_ZRAM_DEF_COMP_DEFLATE=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
@@ -429,9 +462,8 @@ CONFIG_SCSI_DH_EMC=m
 CONFIG_SCSI_DH_ALUA=m
 CONFIG_MD=y
 CONFIG_BLK_DEV_MD=y
+# CONFIG_MD_BITMAP_FILE is not set
 CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
 CONFIG_MD_CLUSTER=m
 CONFIG_BCACHE=m
 CONFIG_BLK_DEV_DM=y
@@ -456,8 +488,10 @@ CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
 CONFIG_DM_VERITY=m
 CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
+CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING=y
 CONFIG_DM_SWITCH=m
 CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
 CONFIG_NETDEVICES=y
 CONFIG_BONDING=m
 CONFIG_DUMMY=m
@@ -484,7 +518,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_ASIX is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_CADENCE is not set
 # CONFIG_NET_VENDOR_CAVIUM is not set
 # CONFIG_NET_VENDOR_CHELSIO is not set
@@ -500,25 +533,27 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
 # CONFIG_NET_VENDOR_LITEX is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
 CONFIG_MLX5_CORE=m
 CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_META is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_MICROCHIP is not set
 # CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
 # CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETERION is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
 # CONFIG_NET_VENDOR_PACKET_ENGINES is not set
 # CONFIG_NET_VENDOR_PENSANDO is not set
 # CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
 # CONFIG_NET_VENDOR_REALTEK is not set
@@ -526,9 +561,9 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SILAN is not set
 # CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_SOCIONEXT is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
@@ -538,6 +573,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VERTEXCOM is not set
 # CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -557,6 +593,7 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LEGACY_TIOCSTI is not set
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_HANGCHECK_TIMER=m
@@ -568,27 +605,33 @@ CONFIG_WATCHDOG_CORE=y
 CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_SOFT_WATCHDOG=m
 CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_FB_DEVICE is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
 CONFIG_MLX5_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_MEM=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_VDPA=m
+CONFIG_VDPA_SIM=m
+CONFIG_VDPA_SIM_NET=m
+CONFIG_VDPA_SIM_BLOCK=m
+CONFIG_VDPA_USER=m
+CONFIG_MLX5_VDPA_NET=m
+CONFIG_VP_VDPA=m
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-CONFIG_S390_CCW_IOMMU=y
-CONFIG_S390_AP_IOMMU=y
+CONFIG_VHOST_VDPA=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -607,7 +650,9 @@ CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
-CONFIG_FS_DAX=y
+CONFIG_BCACHEFS_FS=m
+CONFIG_BCACHEFS_QUOTA=y
+CONFIG_BCACHEFS_POSIX_ACL=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FS_ENCRYPTION=y
 CONFIG_FS_VERITY=y
@@ -617,13 +662,13 @@ CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
@@ -633,16 +678,17 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_EXFAT_FS=m
 CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_TMPFS_INODE64=y
+CONFIG_TMPFS_QUOTA=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
@@ -650,6 +696,7 @@ CONFIG_SQUASHFS_XZ=y
 CONFIG_SQUASHFS_ZSTD=y
 CONFIG_ROMFS_FS=m
 CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=m
 CONFIG_NFS_SWAP=y
@@ -657,6 +704,7 @@ CONFIG_NFSD=m
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_V4_SECURITY_LABEL=y
+# CONFIG_NFSD_LEGACY_CLIENT_TRACKING is not set
 CONFIG_CIFS=m
 CONFIG_CIFS_UPCALL=y
 CONFIG_CIFS_XATTR=y
@@ -674,61 +722,38 @@ CONFIG_NLS_UTF8=m
 CONFIG_DLM=m
 CONFIG_UNICODE=y
 CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_KEY_NOTIFICATIONS=y
 CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
 CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_SECURITY_LANDLOCK=y
 CONFIG_INTEGRITY_SIGNATURE=y
 CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
 CONFIG_IMA=y
 CONFIG_IMA_DEFAULT_HASH_SHA256=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_APPRAISE=y
-CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_CRYPTO_FIPS=y
 CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_SELFTESTS=y
 CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
-CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
 CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_DES=m
@@ -736,47 +761,63 @@ CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
 CONFIG_CRYPTO_ZSTD=m
 CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_JITTERENTROPY_OSR=1
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_STATS=y
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA3_256_S390=m
 CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CHACHA_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_HMAC_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_PKEY_CCA=m
+CONFIG_PKEY_EP11=m
+CONFIG_PKEY_PCKMO=m
+CONFIG_PKEY_UV=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_CRYPTO_KRB5=m
+CONFIG_CRYPTO_KRB5_SELFTESTS=y
 CONFIG_CORDIC=m
 CONFIG_PRIME_NUMBERS=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_CRC4=m
-CONFIG_CRC7=m
-CONFIG_CRC8=m
 CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
@@ -785,23 +826,26 @@ CONFIG_PTDUMP_DEBUGFS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_TEST_LOCKUP=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
+CONFIG_FUNCTION_GRAPH_RETADDR=y
+CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_USER_EVENTS=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
 CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
 CONFIG_KUNIT=m
 CONFIG_KUNIT_DEBUGFS=y
 CONFIG_LKDTM=m
@@ -809,4 +853,3 @@ CONFIG_KPROBES_SANITY_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
 CONFIG_TEST_BPF=m
-CONFIG_TEST_LIVEPATCH=m
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
new file mode 100644
index 000000000000..cefbe2ba1228
--- /dev/null
+++ b/arch/s390/configs/kasan.config
@@ -0,0 +1,4 @@
+# Help: Enable KASan for debugging
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000
diff --git a/arch/s390/configs/mmtypes.config b/arch/s390/configs/mmtypes.config
new file mode 100644
index 000000000000..fe32b442d789
--- /dev/null
+++ b/arch/s390/configs/mmtypes.config
@@ -0,0 +1,2 @@
+# Help: Enable strict memory management typechecks
+CONFIG_STRICT_MM_TYPECHECKS=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index f4976f611b94..8163c1702720 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_SWAP is not set
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BPF_SYSCALL=y
@@ -9,27 +8,23 @@ CONFIG_BPF_SYSCALL=y
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_MARCH_ZEC12=y
-CONFIG_TUNE_ZEC12=y
-# CONFIG_COMPAT is not set
+CONFIG_KEXEC=y
+CONFIG_MARCH_Z13=y
 CONFIG_NR_CPUS=2
 CONFIG_HZ_100=y
-# CONFIG_RELOCATABLE is not set
 # CONFIG_CHSC_SCH is not set
 # CONFIG_SCM_BUS is not set
-CONFIG_CRASH_DUMP=y
+# CONFIG_AP is not set
 # CONFIG_PFAULT is not set
-# CONFIG_S390_HYPFS_FS is not set
+# CONFIG_S390_HYPFS is not set
 # CONFIG_VIRTUALIZATION is not set
 # CONFIG_S390_GUEST is not set
 # CONFIG_SECCOMP is not set
-# CONFIG_GCC_PLUGINS is not set
 # CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
 CONFIG_PARTITION_ADVANCED=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-# CONFIG_COMPACTION is not set
-# CONFIG_MIGRATION is not set
+# CONFIG_SWAP is not set
+# CONFIG_COMPAT_BRK is not set
 CONFIG_NET=y
 # CONFIG_IUCV is not set
 # CONFIG_PCPU_DEV_REFCNT is not set
@@ -50,14 +45,16 @@ CONFIG_ZFCP=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
+# CONFIG_LEGACY_TIOCSTI is not set
 # CONFIG_HVC_IUCV is not set
 # CONFIG_HW_RANDOM_S390 is not set
 # CONFIG_HMC_DRV is not set
+# CONFIG_S390_UV_UAPI is not set
 # CONFIG_S390_TAPE is not set
 # CONFIG_VMCP is not set
 # CONFIG_MONWRITER is not set
 # CONFIG_S390_VMUR is not set
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
@@ -65,17 +62,14 @@ CONFIG_ZFCP=y
 # CONFIG_INOTIFY_USER is not set
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_LSM="yama,loadpin,safesetid,integrity"
 # CONFIG_ZLIB_DFLTCC is not set
 CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_SYMBOLIC_ERRNAME is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_FS=y
 CONFIG_PANIC_ON_OOPS=y
-# CONFIG_SCHED_DEBUG is not set
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 # CONFIG_RCU_TRACE is not set
 # CONFIG_FTRACE is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 000000000000..e2c27588b21a
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_SHA512_S390
+	tristate "Hash functions: SHA-384 and SHA-512"
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z10.
+
+config CRYPTO_SHA1_S390
+	tristate "Hash functions: SHA-1"
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z990.
+
+config CRYPTO_SHA3_256_S390
+	tristate "Hash functions: SHA3-224 and SHA3-256"
+	select CRYPTO_HASH
+	help
+	  SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+	tristate "Hash functions: SHA3-384 and SHA3-512"
+	select CRYPTO_HASH
+	help
+	  SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_GHASH_S390
+	tristate "Hash functions: GHASH"
+	select CRYPTO_HASH
+	help
+	  GCM GHASH hash function (NIST SP800-38D)
+
+	  Architecture: s390
+
+	  It is available as of z196.
+
+config CRYPTO_AES_S390
+	tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	help
+	  Block cipher: AES cipher algorithms (FIPS 197)
+	  AEAD cipher: AES with GCM
+	  Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+	  Architecture: s390
+
+	  As of z9 the ECB and CBC modes are hardware accelerated
+	  for 128 bit keys.
+
+	  As of z10 the ECB and CBC modes are hardware accelerated
+	  for all AES key sizes.
+
+	  As of z196 the CTR mode is hardware accelerated for all AES
+	  key sizes and XTS mode is hardware accelerated for 256 and
+	  512 bit keys.
+
+config CRYPTO_DES_S390
+	tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_DES
+	help
+	  Block ciphers: DES (FIPS 46-2) cipher algorithm
+	  Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+	  Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+	  Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+	  Architecture: s390
+
+	  As of z990 the ECB and CBC mode are hardware accelerated.
+	  As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_HMAC_S390
+	tristate "Keyed-hash message authentication code: HMAC"
+	select CRYPTO_HASH
+	help
+	  s390 specific HMAC hardware support for SHA224, SHA256, SHA384 and
+	  SHA512.
+
+	  Architecture: s390
+
+endmenu
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 1b1cc478fa94..21757d86cd49 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -4,18 +4,13 @@
 #
 
 obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
-obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA3_256_S390) += sha3_256_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
-obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
-obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
 obj-y += arch_random.o
-
-crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
-chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 1023e9d43d44..5d36f4020dfa 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -51,8 +51,13 @@ struct s390_aes_ctx {
 };
 
 struct s390_xts_ctx {
-	u8 key[32];
-	u8 pcc_key[32];
+	union {
+		u8 keys[64];
+		struct {
+			u8 key[32];
+			u8 pcc_key[32];
+		};
+	};
 	int key_len;
 	unsigned long fc;
 	struct crypto_skcipher *fallback;
@@ -61,7 +66,6 @@ struct s390_xts_ctx {
 struct gcm_sg_walk {
 	struct scatter_walk walk;
 	unsigned int walk_bytes;
-	u8 *walk_ptr;
 	unsigned int walk_bytes_remain;
 	u8 buf[AES_BLOCK_SIZE];
 	unsigned int buf_bytes;
@@ -398,10 +402,6 @@ static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 	if (err)
 		return err;
 
-	/* In fips mode only 128 bit or 256 bit keys are valid */
-	if (fips_enabled && key_len != 32 && key_len != 64)
-		return -EINVAL;
-
 	/* Pick the correct function code based on the key length */
 	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
 	     (key_len == 64) ? CPACF_KM_XTS_256 : 0;
@@ -530,6 +530,108 @@ static struct skcipher_alg xts_aes_alg = {
 	.decrypt		=	xts_aes_decrypt,
 };
 
+static int fullxts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			       unsigned int key_len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	unsigned long fc;
+	int err;
+
+	err = xts_fallback_setkey(tfm, in_key, key_len);
+	if (err)
+		return err;
+
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 32) ? CPACF_KM_XTS_128_FULL :
+	     (key_len == 64) ? CPACF_KM_XTS_256_FULL : 0;
+
+	/* Check if the function code is available */
+	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!xts_ctx->fc)
+		return 0;
+
+	/* Store double-key */
+	memcpy(xts_ctx->keys, in_key, key_len);
+	xts_ctx->key_len = key_len;
+	return 0;
+}
+
+static int fullxts_aes_crypt(struct skcipher_request *req,  unsigned long modifier)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	unsigned int offset, nbytes, n;
+	struct skcipher_walk walk;
+	int ret;
+	struct {
+		__u8 key[64];
+		__u8 tweak[16];
+		__u8 nap[16];
+	} fxts_param = {
+		.nap = {0},
+	};
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+		return (modifier & CPACF_DECRYPT) ?
+			crypto_skcipher_decrypt(subreq) :
+			crypto_skcipher_encrypt(subreq);
+	}
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	offset = xts_ctx->key_len & 0x20;
+	memcpy(fxts_param.key + offset, xts_ctx->keys, xts_ctx->key_len);
+	memcpy(fxts_param.tweak, req->iv, AES_BLOCK_SIZE);
+	fxts_param.nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+
+	while ((nbytes = walk.nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(xts_ctx->fc | modifier, fxts_param.key + offset,
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
+	}
+	memzero_explicit(&fxts_param, sizeof(fxts_param));
+	return ret;
+}
+
+static int fullxts_aes_encrypt(struct skcipher_request *req)
+{
+	return fullxts_aes_crypt(req, 0);
+}
+
+static int fullxts_aes_decrypt(struct skcipher_request *req)
+{
+	return fullxts_aes_crypt(req, CPACF_DECRYPT);
+}
+
+static struct skcipher_alg fullxts_aes_alg = {
+	.base.cra_name		=	"xts(aes)",
+	.base.cra_driver_name	=	"full-xts-aes-s390",
+	.base.cra_priority	=	403,	/* aes-xts-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_xts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	xts_fallback_init,
+	.exit			=	xts_fallback_exit,
+	.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+	.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	fullxts_aes_set_key,
+	.encrypt		=	fullxts_aes_encrypt,
+	.decrypt		=	fullxts_aes_decrypt,
+};
+
 static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
@@ -601,7 +703,9 @@ static int ctr_aes_crypt(struct skcipher_request *req)
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+		memset(buf, 0, AES_BLOCK_SIZE);
+		memcpy(buf, walk.src.virt.addr, nbytes);
+		cpacf_kmctr(sctx->fc, sctx->key, buf, buf,
 			    AES_BLOCK_SIZE, walk.iv);
 		memcpy(walk.dst.virt.addr, buf, nbytes);
 		crypto_inc(walk.iv, AES_BLOCK_SIZE);
@@ -682,29 +786,20 @@ static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg,
 
 static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw)
 {
-	struct scatterlist *nextsg;
-
-	gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain);
-	while (!gw->walk_bytes) {
-		nextsg = sg_next(gw->walk.sg);
-		if (!nextsg)
-			return 0;
-		scatterwalk_start(&gw->walk, nextsg);
-		gw->walk_bytes = scatterwalk_clamp(&gw->walk,
-						   gw->walk_bytes_remain);
-	}
-	gw->walk_ptr = scatterwalk_map(&gw->walk);
+	if (gw->walk_bytes_remain == 0)
+		return 0;
+	gw->walk_bytes = scatterwalk_next(&gw->walk, gw->walk_bytes_remain);
 	return gw->walk_bytes;
 }
 
 static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
-					     unsigned int nbytes)
+					     unsigned int nbytes, bool out)
 {
 	gw->walk_bytes_remain -= nbytes;
-	scatterwalk_unmap(gw->walk_ptr);
-	scatterwalk_advance(&gw->walk, nbytes);
-	scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
-	gw->walk_ptr = NULL;
+	if (out)
+		scatterwalk_done_dst(&gw->walk, nbytes);
+	else
+		scatterwalk_done_src(&gw->walk, nbytes);
 }
 
 static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
@@ -730,16 +825,16 @@ static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
 	}
 
 	if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) {
-		gw->ptr = gw->walk_ptr;
+		gw->ptr = gw->walk.addr;
 		gw->nbytes = gw->walk_bytes;
 		goto out;
 	}
 
 	while (1) {
 		n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes);
-		memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n);
+		memcpy(gw->buf + gw->buf_bytes, gw->walk.addr, n);
 		gw->buf_bytes += n;
-		_gcm_sg_unmap_and_advance(gw, n);
+		_gcm_sg_unmap_and_advance(gw, n, false);
 		if (gw->buf_bytes >= minbytesneeded) {
 			gw->ptr = gw->buf;
 			gw->nbytes = gw->buf_bytes;
@@ -771,13 +866,12 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
 	}
 
 	if (gw->walk_bytes >= minbytesneeded) {
-		gw->ptr = gw->walk_ptr;
+		gw->ptr = gw->walk.addr;
 		gw->nbytes = gw->walk_bytes;
 		goto out;
 	}
 
-	scatterwalk_unmap(gw->walk_ptr);
-	gw->walk_ptr = NULL;
+	scatterwalk_unmap(&gw->walk);
 
 	gw->ptr = gw->buf;
 	gw->nbytes = sizeof(gw->buf);
@@ -799,7 +893,7 @@ static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
 		} else
 			gw->buf_bytes = 0;
 	} else
-		_gcm_sg_unmap_and_advance(gw, bytesdone);
+		_gcm_sg_unmap_and_advance(gw, bytesdone, false);
 
 	return bytesdone;
 }
@@ -816,11 +910,11 @@ static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone)
 			if (!_gcm_sg_clamp_and_map(gw))
 				return i;
 			n = min(gw->walk_bytes, bytesdone - i);
-			memcpy(gw->walk_ptr, gw->buf + i, n);
-			_gcm_sg_unmap_and_advance(gw, n);
+			memcpy(gw->walk.addr, gw->buf + i, n);
+			_gcm_sg_unmap_and_advance(gw, n, true);
 		}
 	} else
-		_gcm_sg_unmap_and_advance(gw, bytesdone);
+		_gcm_sg_unmap_and_advance(gw, bytesdone, true);
 
 	return bytesdone;
 }
@@ -957,7 +1051,7 @@ static struct aead_alg gcm_aes_aead = {
 };
 
 static struct crypto_alg *aes_s390_alg;
-static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static struct skcipher_alg *aes_s390_skcipher_algs[5];
 static int aes_s390_skciphers_num;
 static struct aead_alg *aes_s390_aead_alg;
 
@@ -1014,6 +1108,13 @@ static int __init aes_s390_init(void)
 			goto out_err;
 	}
 
+	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128_FULL) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256_FULL)) {
+		ret = aes_s390_register_skcipher(&fullxts_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
 	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
 		ret = aes_s390_register_skcipher(&xts_aes_alg);
@@ -1049,11 +1150,11 @@ out_err:
 	return ret;
 }
 
-module_cpu_feature_match(MSA, aes_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
 module_exit(aes_s390_fini);
 
 MODULE_ALIAS_CRYPTO("aes-all");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("GPL");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 1f2d40993c4d..a8a2407381af 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -10,6 +10,7 @@
 #include <linux/atomic.h>
 #include <linux/random.h>
 #include <linux/static_key.h>
+#include <asm/archrandom.h>
 #include <asm/cpacf.h>
 
 DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
deleted file mode 100644
index 2ec51f339cec..000000000000
--- a/arch/s390/crypto/chacha-glue.c
+++ /dev/null
@@ -1,130 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * s390 ChaCha stream cipher.
- *
- * Copyright IBM Corp. 2021
- */
-
-#define KMSG_COMPONENT "chacha_s390"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <crypto/internal/chacha.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/algapi.h>
-#include <linux/cpufeature.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/fpu/api.h>
-#include "chacha-s390.h"
-
-static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
-				unsigned int nbytes, const u32 *key,
-				u32 *counter)
-{
-	struct kernel_fpu vxstate;
-
-	kernel_fpu_begin(&vxstate, KERNEL_VXR);
-	chacha20_vx(dst, src, nbytes, key, counter);
-	kernel_fpu_end(&vxstate, KERNEL_VXR);
-
-	*counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
-}
-
-static int chacha20_s390(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 state[CHACHA_STATE_WORDS] __aligned(16);
-	struct skcipher_walk walk;
-	unsigned int nbytes;
-	int rc;
-
-	rc = skcipher_walk_virt(&walk, req, false);
-	chacha_init_generic(state, ctx->key, req->iv);
-
-	while (walk.nbytes > 0) {
-		nbytes = walk.nbytes;
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		if (nbytes <= CHACHA_BLOCK_SIZE) {
-			chacha_crypt_generic(state, walk.dst.virt.addr,
-					     walk.src.virt.addr, nbytes,
-					     ctx->nrounds);
-		} else {
-			chacha20_crypt_s390(state, walk.dst.virt.addr,
-					    walk.src.virt.addr, nbytes,
-					    &state[4], &state[12]);
-		}
-		rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-	return rc;
-}
-
-void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
-{
-	/* TODO: implement hchacha_block_arch() in assembly */
-	hchacha_block_generic(state, stream, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
-{
-	chacha_init_generic(state, key, iv);
-}
-EXPORT_SYMBOL(chacha_init_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
-		       unsigned int bytes, int nrounds)
-{
-	/* s390 chacha20 implementation has 20 rounds hard-coded,
-	 * it cannot handle a block of data or less, but otherwise
-	 * it can handle data of arbitrary size
-	 */
-	if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
-		chacha_crypt_generic(state, dst, src, bytes, nrounds);
-	else
-		chacha20_crypt_s390(state, dst, src, bytes,
-				    &state[4], &state[12]);
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-static struct skcipher_alg chacha_algs[] = {
-	{
-		.base.cra_name		= "chacha20",
-		.base.cra_driver_name	= "chacha20-s390",
-		.base.cra_priority	= 900,
-		.base.cra_blocksize	= 1,
-		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
-		.base.cra_module	= THIS_MODULE,
-
-		.min_keysize		= CHACHA_KEY_SIZE,
-		.max_keysize		= CHACHA_KEY_SIZE,
-		.ivsize			= CHACHA_IV_SIZE,
-		.chunksize		= CHACHA_BLOCK_SIZE,
-		.setkey			= chacha20_setkey,
-		.encrypt		= chacha20_s390,
-		.decrypt		= chacha20_s390,
-	}
-};
-
-static int __init chacha_mod_init(void)
-{
-	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
-		crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
-}
-
-static void __exit chacha_mod_fini(void)
-{
-	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
-		crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
-}
-
-module_cpu_feature_match(VXRS, chacha_mod_init);
-module_exit(chacha_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha20 stream cipher");
-MODULE_LICENSE("GPL v2");
-
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
deleted file mode 100644
index fafecad20752..000000000000
--- a/arch/s390/crypto/crc32-vx.c
+++ /dev/null
@@ -1,310 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Crypto-API module for CRC-32 algorithms implemented with the
- * z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT	"crc32-vx"
-#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <asm/fpu/api.h>
-
-
-#define CRC32_BLOCK_SIZE	1
-#define CRC32_DIGEST_SIZE	4
-
-#define VX_MIN_LEN		64
-#define VX_ALIGNMENT		16L
-#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
-
-struct crc_ctx {
-	u32 key;
-};
-
-struct crc_desc_ctx {
-	u32 crc;
-};
-
-/* Prototypes for functions in assembly files */
-u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used.   Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- *
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
-	static u32 __pure ___fname(u32 crc,				    \
-				unsigned char const *data, size_t datalen)  \
-	{								    \
-		struct kernel_fpu vxstate;				    \
-		unsigned long prealign, aligned, remaining;		    \
-									    \
-		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)		    \
-			return ___crc32_sw(crc, data, datalen);		    \
-									    \
-		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
-			prealign = VX_ALIGNMENT -			    \
-				  ((unsigned long)data & VX_ALIGN_MASK);    \
-			datalen -= prealign;				    \
-			crc = ___crc32_sw(crc, data, prealign);		    \
-			data = (void *)((unsigned long)data + prealign);    \
-		}							    \
-									    \
-		aligned = datalen & ~VX_ALIGN_MASK;			    \
-		remaining = datalen & VX_ALIGN_MASK;			    \
-									    \
-		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
-		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
-									    \
-		if (remaining)						    \
-			crc = ___crc32_sw(crc, data + aligned, remaining);  \
-									    \
-		return crc;						    \
-	}
-
-DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
-DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
-DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
-
-
-static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
-{
-	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
-	mctx->key = 0;
-	return 0;
-}
-
-static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
-{
-	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
-	mctx->key = ~0;
-	return 0;
-}
-
-static int crc32_vx_init(struct shash_desc *desc)
-{
-	struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
-	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = mctx->key;
-	return 0;
-}
-
-static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
-			   unsigned int newkeylen)
-{
-	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
-	if (newkeylen != sizeof(mctx->key))
-		return -EINVAL;
-	mctx->key = le32_to_cpu(*(__le32 *)newkey);
-	return 0;
-}
-
-static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
-			     unsigned int newkeylen)
-{
-	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
-	if (newkeylen != sizeof(mctx->key))
-		return -EINVAL;
-	mctx->key = be32_to_cpu(*(__be32 *)newkey);
-	return 0;
-}
-
-static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
-{
-	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__le32 *)out = cpu_to_le32p(&ctx->crc);
-	return 0;
-}
-
-static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
-{
-	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__be32 *)out = cpu_to_be32p(&ctx->crc);
-	return 0;
-}
-
-static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
-{
-	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	/*
-	 * Perform a final XOR with 0xFFFFFFFF to be in sync
-	 * with the generic crc32c shash implementation.
-	 */
-	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
-	return 0;
-}
-
-static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
-			      u8 *out)
-{
-	*(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
-	return 0;
-}
-
-static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
-			      u8 *out)
-{
-	*(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
-	return 0;
-}
-
-static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
-			     u8 *out)
-{
-	/*
-	 * Perform a final XOR with 0xFFFFFFFF to be in sync
-	 * with the generic crc32c shash implementation.
-	 */
-	*(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
-	return 0;
-}
-
-
-#define CRC32_VX_FINUP(alg, func)					      \
-	static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data,  \
-				   unsigned int datalen, u8 *out)	      \
-	{								      \
-		return __ ## alg ## _vx_finup(shash_desc_ctx(desc),	      \
-					      data, datalen, out);	      \
-	}
-
-CRC32_VX_FINUP(crc32le, crc32_le_vx)
-CRC32_VX_FINUP(crc32be, crc32_be_vx)
-CRC32_VX_FINUP(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_DIGEST(alg, func)					      \
-	static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
-				     unsigned int len, u8 *out)		      \
-	{								      \
-		return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm),    \
-					      data, len, out);		      \
-	}
-
-CRC32_VX_DIGEST(crc32le, crc32_le_vx)
-CRC32_VX_DIGEST(crc32be, crc32_be_vx)
-CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_UPDATE(alg, func)					      \
-	static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
-				     unsigned int datalen)		      \
-	{								      \
-		struct crc_desc_ctx *ctx = shash_desc_ctx(desc);	      \
-		ctx->crc = func(ctx->crc, data, datalen);		      \
-		return 0;						      \
-	}
-
-CRC32_VX_UPDATE(crc32le, crc32_le_vx)
-CRC32_VX_UPDATE(crc32be, crc32_be_vx)
-CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
-
-
-static struct shash_alg crc32_vx_algs[] = {
-	/* CRC-32 LE */
-	{
-		.init		=	crc32_vx_init,
-		.setkey		=	crc32_vx_setkey,
-		.update		=	crc32le_vx_update,
-		.final		=	crc32le_vx_final,
-		.finup		=	crc32le_vx_finup,
-		.digest		=	crc32le_vx_digest,
-		.descsize	=	sizeof(struct crc_desc_ctx),
-		.digestsize	=	CRC32_DIGEST_SIZE,
-		.base		=	{
-			.cra_name	 = "crc32",
-			.cra_driver_name = "crc32-vx",
-			.cra_priority	 = 200,
-			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
-			.cra_blocksize	 = CRC32_BLOCK_SIZE,
-			.cra_ctxsize	 = sizeof(struct crc_ctx),
-			.cra_module	 = THIS_MODULE,
-			.cra_init	 = crc32_vx_cra_init_zero,
-		},
-	},
-	/* CRC-32 BE */
-	{
-		.init		=	crc32_vx_init,
-		.setkey		=	crc32be_vx_setkey,
-		.update		=	crc32be_vx_update,
-		.final		=	crc32be_vx_final,
-		.finup		=	crc32be_vx_finup,
-		.digest		=	crc32be_vx_digest,
-		.descsize	=	sizeof(struct crc_desc_ctx),
-		.digestsize	=	CRC32_DIGEST_SIZE,
-		.base		=	{
-			.cra_name	 = "crc32be",
-			.cra_driver_name = "crc32be-vx",
-			.cra_priority	 = 200,
-			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
-			.cra_blocksize	 = CRC32_BLOCK_SIZE,
-			.cra_ctxsize	 = sizeof(struct crc_ctx),
-			.cra_module	 = THIS_MODULE,
-			.cra_init	 = crc32_vx_cra_init_zero,
-		},
-	},
-	/* CRC-32C LE */
-	{
-		.init		=	crc32_vx_init,
-		.setkey		=	crc32_vx_setkey,
-		.update		=	crc32c_vx_update,
-		.final		=	crc32c_vx_final,
-		.finup		=	crc32c_vx_finup,
-		.digest		=	crc32c_vx_digest,
-		.descsize	=	sizeof(struct crc_desc_ctx),
-		.digestsize	=	CRC32_DIGEST_SIZE,
-		.base		=	{
-			.cra_name	 = "crc32c",
-			.cra_driver_name = "crc32c-vx",
-			.cra_priority	 = 200,
-			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
-			.cra_blocksize	 = CRC32_BLOCK_SIZE,
-			.cra_ctxsize	 = sizeof(struct crc_ctx),
-			.cra_module	 = THIS_MODULE,
-			.cra_init	 = crc32_vx_cra_init_invert,
-		},
-	},
-};
-
-
-static int __init crc_vx_mod_init(void)
-{
-	return crypto_register_shashes(crc32_vx_algs,
-				       ARRAY_SIZE(crc32_vx_algs));
-}
-
-static void __exit crc_vx_mod_exit(void)
-{
-	crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
-}
-
-module_cpu_feature_match(VXRS, crc_vx_mod_init);
-module_exit(crc_vx_mod_exit);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-vx");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index e013088b5115..8e75b83a5ddc 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -492,7 +492,7 @@ out_err:
 	return ret;
 }
 
-module_cpu_feature_match(MSA, des_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
 module_exit(des_s390_exit);
 
 MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 6b07a2f1ce8a..dcbcee37cb63 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -8,29 +8,28 @@
  * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
 
+#include <asm/cpacf.h>
+#include <crypto/ghash.h>
 #include <crypto/internal/hash.h>
-#include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <asm/cpacf.h>
-
-#define GHASH_BLOCK_SIZE	16
-#define GHASH_DIGEST_SIZE	16
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
 
-struct ghash_ctx {
+struct s390_ghash_ctx {
 	u8 key[GHASH_BLOCK_SIZE];
 };
 
-struct ghash_desc_ctx {
+struct s390_ghash_desc_ctx {
 	u8 icv[GHASH_BLOCK_SIZE];
 	u8 key[GHASH_BLOCK_SIZE];
-	u8 buffer[GHASH_BLOCK_SIZE];
-	u32 bytes;
 };
 
 static int ghash_init(struct shash_desc *desc)
 {
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
 	memset(dctx, 0, sizeof(*dctx));
 	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
@@ -41,7 +40,7 @@ static int ghash_init(struct shash_desc *desc)
 static int ghash_setkey(struct crypto_shash *tfm,
 			const u8 *key, unsigned int keylen)
 {
-	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+	struct s390_ghash_ctx *ctx = crypto_shash_ctx(tfm);
 
 	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
@@ -54,80 +53,71 @@ static int ghash_setkey(struct crypto_shash *tfm,
 static int ghash_update(struct shash_desc *desc,
 			 const u8 *src, unsigned int srclen)
 {
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int n;
-	u8 *buf = dctx->buffer;
-
-	if (dctx->bytes) {
-		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
 
-		n = min(srclen, dctx->bytes);
-		dctx->bytes -= n;
-		srclen -= n;
-
-		memcpy(pos, src, n);
-		src += n;
+	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+	cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
+	return srclen - n;
+}
 
-		if (!dctx->bytes) {
-			cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
-				   GHASH_BLOCK_SIZE);
-		}
-	}
+static void ghash_flush(struct s390_ghash_desc_ctx *dctx, const u8 *src,
+			unsigned int len)
+{
+	if (len) {
+		u8 buf[GHASH_BLOCK_SIZE] = {};
 
-	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
-	if (n) {
-		cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
-		src += n;
-		srclen -= n;
+		memcpy(buf, src, len);
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+		memzero_explicit(buf, sizeof(buf));
 	}
+}
 
-	if (srclen) {
-		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
-		memcpy(buf, src, srclen);
-	}
+static int ghash_finup(struct shash_desc *desc, const u8 *src,
+		       unsigned int len, u8 *dst)
+{
+	struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
+	ghash_flush(dctx, src, len);
+	memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
 	return 0;
 }
 
-static int ghash_flush(struct ghash_desc_ctx *dctx)
+static int ghash_export(struct shash_desc *desc, void *out)
 {
-	u8 *buf = dctx->buffer;
-
-	if (dctx->bytes) {
-		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
-
-		memset(pos, 0, dctx->bytes);
-		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
-		dctx->bytes = 0;
-	}
+	struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
+	memcpy(out, dctx->icv, GHASH_DIGEST_SIZE);
 	return 0;
 }
 
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_import(struct shash_desc *desc, const void *in)
 {
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	int ret;
+	struct s390_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	struct s390_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	ret = ghash_flush(dctx);
-	if (!ret)
-		memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
-	return ret;
+	memcpy(dctx->icv, in, GHASH_DIGEST_SIZE);
+	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+	return 0;
 }
 
 static struct shash_alg ghash_alg = {
 	.digestsize	= GHASH_DIGEST_SIZE,
 	.init		= ghash_init,
 	.update		= ghash_update,
-	.final		= ghash_final,
+	.finup		= ghash_finup,
 	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
+	.export		= ghash_export,
+	.import		= ghash_import,
+	.statesize	= sizeof(struct ghash_desc_ctx),
+	.descsize	= sizeof(struct s390_ghash_desc_ctx),
 	.base		= {
 		.cra_name		= "ghash",
 		.cra_driver_name	= "ghash-s390",
 		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_AHASH_ALG_BLOCK_ONLY,
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_ctxsize		= sizeof(struct s390_ghash_ctx),
 		.cra_module		= THIS_MODULE,
 	},
 };
@@ -145,7 +135,7 @@ static void __exit ghash_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_cpu_feature_match(MSA, ghash_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
 module_exit(ghash_mod_exit);
 
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c
new file mode 100644
index 000000000000..93a1098d9f8d
--- /dev/null
+++ b/arch/s390/crypto/hmac_s390.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * s390 specific HMAC support.
+ */
+
+#define KMSG_COMPONENT	"hmac_s390"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <asm/cpacf.h>
+#include <crypto/internal/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/sha2.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+/*
+ * KMAC param block layout for sha2 function codes:
+ * The layout of the param block for the KMAC instruction depends on the
+ * blocksize of the used hashing sha2-algorithm function codes. The param block
+ * contains the hash chaining value (cv), the input message bit-length (imbl)
+ * and the hmac-secret (key). To prevent code duplication, the sizes of all
+ * these are calculated based on the blocksize.
+ *
+ * param-block:
+ * +-------+
+ * | cv    |
+ * +-------+
+ * | imbl  |
+ * +-------+
+ * | key   |
+ * +-------+
+ *
+ * sizes:
+ * part | sh2-alg | calculation | size | type
+ * -----+---------+-------------+------+--------
+ * cv	| 224/256 | blocksize/2 |   32 |  u64[8]
+ *	| 384/512 |		|   64 | u128[8]
+ * imbl | 224/256 | blocksize/8 |    8 |     u64
+ *	| 384/512 |		|   16 |    u128
+ * key	| 224/256 | blocksize	|   64 |  u8[64]
+ *	| 384/512 |		|  128 | u8[128]
+ */
+
+#define MAX_DIGEST_SIZE		SHA512_DIGEST_SIZE
+#define MAX_IMBL_SIZE		sizeof(u128)
+#define MAX_BLOCK_SIZE		SHA512_BLOCK_SIZE
+
+#define SHA2_CV_SIZE(bs)	((bs) >> 1)
+#define SHA2_IMBL_SIZE(bs)	((bs) >> 3)
+
+#define SHA2_IMBL_OFFSET(bs)	(SHA2_CV_SIZE(bs))
+#define SHA2_KEY_OFFSET(bs)	(SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs))
+
+struct s390_hmac_ctx {
+	u8 key[MAX_BLOCK_SIZE];
+};
+
+union s390_kmac_gr0 {
+	unsigned long reg;
+	struct {
+		unsigned long		: 48;
+		unsigned long ikp	:  1;
+		unsigned long iimp	:  1;
+		unsigned long ccup	:  1;
+		unsigned long		:  6;
+		unsigned long fc	:  7;
+	};
+};
+
+struct s390_kmac_sha2_ctx {
+	u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + MAX_BLOCK_SIZE];
+	union s390_kmac_gr0 gr0;
+	u64 buflen[2];
+};
+
+/*
+ * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize
+ */
+static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo,
+				      u64 buflen_hi, unsigned int blocksize)
+{
+	u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize);
+
+	switch (blocksize) {
+	case SHA256_BLOCK_SIZE:
+		*(u64 *)imbl = buflen_lo * BITS_PER_BYTE;
+		break;
+	case SHA512_BLOCK_SIZE:
+		*(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3;
+		break;
+	default:
+		break;
+	}
+}
+
+static int hash_data(const u8 *in, unsigned int inlen,
+		     u8 *digest, unsigned int digestsize, bool final)
+{
+	unsigned long func;
+	union {
+		struct sha256_paramblock {
+			u32 h[8];
+			u64 mbl;
+		} sha256;
+		struct sha512_paramblock {
+			u64 h[8];
+			u128 mbl;
+		} sha512;
+	} __packed param;
+
+#define PARAM_INIT(x, y, z)		   \
+	param.sha##x.h[0] = SHA##y ## _H0; \
+	param.sha##x.h[1] = SHA##y ## _H1; \
+	param.sha##x.h[2] = SHA##y ## _H2; \
+	param.sha##x.h[3] = SHA##y ## _H3; \
+	param.sha##x.h[4] = SHA##y ## _H4; \
+	param.sha##x.h[5] = SHA##y ## _H5; \
+	param.sha##x.h[6] = SHA##y ## _H6; \
+	param.sha##x.h[7] = SHA##y ## _H7; \
+	param.sha##x.mbl = (z)
+
+	switch (digestsize) {
+	case SHA224_DIGEST_SIZE:
+		func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
+		PARAM_INIT(256, 224, inlen * 8);
+		if (!final)
+			digestsize = SHA256_DIGEST_SIZE;
+		break;
+	case SHA256_DIGEST_SIZE:
+		func = final ? CPACF_KLMD_SHA_256 : CPACF_KIMD_SHA_256;
+		PARAM_INIT(256, 256, inlen * 8);
+		break;
+	case SHA384_DIGEST_SIZE:
+		func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
+		PARAM_INIT(512, 384, inlen * 8);
+		if (!final)
+			digestsize = SHA512_DIGEST_SIZE;
+		break;
+	case SHA512_DIGEST_SIZE:
+		func = final ? CPACF_KLMD_SHA_512 : CPACF_KIMD_SHA_512;
+		PARAM_INIT(512, 512, inlen * 8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+#undef PARAM_INIT
+
+	cpacf_klmd(func, &param, in, inlen);
+
+	memcpy(digest, &param, digestsize);
+
+	return 0;
+}
+
+static int hash_key(const u8 *in, unsigned int inlen,
+		    u8 *digest, unsigned int digestsize)
+{
+	return hash_data(in, inlen, digest, digestsize, true);
+}
+
+static int s390_hmac_sha2_setkey(struct crypto_shash *tfm,
+				 const u8 *key, unsigned int keylen)
+{
+	struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(tfm);
+	unsigned int ds = crypto_shash_digestsize(tfm);
+	unsigned int bs = crypto_shash_blocksize(tfm);
+
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	if (keylen > bs)
+		return hash_key(key, keylen, tfm_ctx->key, ds);
+
+	memcpy(tfm_ctx->key, key, keylen);
+	return 0;
+}
+
+static int s390_hmac_sha2_init(struct shash_desc *desc)
+{
+	struct s390_hmac_ctx *tfm_ctx = crypto_shash_ctx(desc->tfm);
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bs = crypto_shash_blocksize(desc->tfm);
+
+	memcpy(ctx->param + SHA2_KEY_OFFSET(bs),
+	       tfm_ctx->key, bs);
+
+	ctx->buflen[0] = 0;
+	ctx->buflen[1] = 0;
+	ctx->gr0.reg = 0;
+	switch (crypto_shash_digestsize(desc->tfm)) {
+	case SHA224_DIGEST_SIZE:
+		ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_224;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_256;
+		break;
+	case SHA384_DIGEST_SIZE:
+		ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_384;
+		break;
+	case SHA512_DIGEST_SIZE:
+		ctx->gr0.fc = CPACF_KMAC_HMAC_SHA_512;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int s390_hmac_sha2_update(struct shash_desc *desc,
+				 const u8 *data, unsigned int len)
+{
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bs = crypto_shash_blocksize(desc->tfm);
+	unsigned int n = round_down(len, bs);
+
+	ctx->buflen[0] += n;
+	if (ctx->buflen[0] < n)
+		ctx->buflen[1]++;
+
+	/* process as many blocks as possible */
+	ctx->gr0.iimp = 1;
+	_cpacf_kmac(&ctx->gr0.reg, ctx->param, data, n);
+	return len - n;
+}
+
+static int s390_hmac_sha2_finup(struct shash_desc *desc, const u8 *src,
+				unsigned int len, u8 *out)
+{
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bs = crypto_shash_blocksize(desc->tfm);
+
+	ctx->buflen[0] += len;
+	if (ctx->buflen[0] < len)
+		ctx->buflen[1]++;
+
+	ctx->gr0.iimp = 0;
+	kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs);
+	_cpacf_kmac(&ctx->gr0.reg, ctx->param, src, len);
+	memcpy(out, ctx->param, crypto_shash_digestsize(desc->tfm));
+
+	return 0;
+}
+
+static int s390_hmac_sha2_digest(struct shash_desc *desc,
+				 const u8 *data, unsigned int len, u8 *out)
+{
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int ds = crypto_shash_digestsize(desc->tfm);
+	int rc;
+
+	rc = s390_hmac_sha2_init(desc);
+	if (rc)
+		return rc;
+
+	ctx->gr0.iimp = 0;
+	kmac_sha2_set_imbl(ctx->param, len, 0,
+			   crypto_shash_blocksize(desc->tfm));
+	_cpacf_kmac(&ctx->gr0.reg, ctx->param, data, len);
+	memcpy(out, ctx->param, ds);
+
+	return 0;
+}
+
+static int s390_hmac_export_zero(struct shash_desc *desc, void *out)
+{
+	struct crypto_shash *tfm = desc->tfm;
+	u8 ipad[SHA512_BLOCK_SIZE];
+	struct s390_hmac_ctx *ctx;
+	unsigned int bs;
+	int err, i;
+
+	ctx = crypto_shash_ctx(tfm);
+	bs = crypto_shash_blocksize(tfm);
+	for (i = 0; i < bs; i++)
+		ipad[i] = ctx->key[i] ^ HMAC_IPAD_VALUE;
+
+	err = hash_data(ipad, bs, out, crypto_shash_digestsize(tfm), false);
+	memzero_explicit(ipad, sizeof(ipad));
+	return err;
+}
+
+static int s390_hmac_export(struct shash_desc *desc, void *out)
+{
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bs = crypto_shash_blocksize(desc->tfm);
+	unsigned int ds = bs / 2;
+	union {
+		u8 *u8;
+		u64 *u64;
+	} p = { .u8 = out };
+	int err = 0;
+
+	if (!ctx->gr0.ikp)
+		err = s390_hmac_export_zero(desc, out);
+	else
+		memcpy(p.u8, ctx->param, ds);
+	p.u8 += ds;
+	put_unaligned(ctx->buflen[0], p.u64++);
+	if (ds == SHA512_DIGEST_SIZE)
+		put_unaligned(ctx->buflen[1], p.u64);
+	return err;
+}
+
+static int s390_hmac_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bs = crypto_shash_blocksize(desc->tfm);
+	unsigned int ds = bs / 2;
+	union {
+		const u8 *u8;
+		const u64 *u64;
+	} p = { .u8 = in };
+	int err;
+
+	err = s390_hmac_sha2_init(desc);
+	memcpy(ctx->param, p.u8, ds);
+	p.u8 += ds;
+	ctx->buflen[0] = get_unaligned(p.u64++);
+	if (ds == SHA512_DIGEST_SIZE)
+		ctx->buflen[1] = get_unaligned(p.u64);
+	if (ctx->buflen[0] | ctx->buflen[1])
+		ctx->gr0.ikp = 1;
+	return err;
+}
+
+#define S390_HMAC_SHA2_ALG(x, ss) {					\
+	.fc = CPACF_KMAC_HMAC_SHA_##x,					\
+	.alg = {							\
+		.init = s390_hmac_sha2_init,				\
+		.update = s390_hmac_sha2_update,			\
+		.finup = s390_hmac_sha2_finup,				\
+		.digest = s390_hmac_sha2_digest,			\
+		.setkey = s390_hmac_sha2_setkey,			\
+		.export = s390_hmac_export,				\
+		.import = s390_hmac_import,				\
+		.descsize = sizeof(struct s390_kmac_sha2_ctx),		\
+		.halg = {						\
+			.statesize = ss,				\
+			.digestsize = SHA##x##_DIGEST_SIZE,		\
+			.base = {					\
+				.cra_name = "hmac(sha" #x ")",		\
+				.cra_driver_name = "hmac_s390_sha" #x,	\
+				.cra_blocksize = SHA##x##_BLOCK_SIZE,	\
+				.cra_priority = 400,			\
+				.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | \
+					     CRYPTO_AHASH_ALG_FINUP_MAX, \
+				.cra_ctxsize = sizeof(struct s390_hmac_ctx), \
+				.cra_module = THIS_MODULE,		\
+			},						\
+		},							\
+	},								\
+}
+
+static struct s390_hmac_alg {
+	bool registered;
+	unsigned int fc;
+	struct shash_alg alg;
+} s390_hmac_algs[] = {
+	S390_HMAC_SHA2_ALG(224, sizeof(struct crypto_sha256_state)),
+	S390_HMAC_SHA2_ALG(256, sizeof(struct crypto_sha256_state)),
+	S390_HMAC_SHA2_ALG(384, SHA512_STATE_SIZE),
+	S390_HMAC_SHA2_ALG(512, SHA512_STATE_SIZE),
+};
+
+static __always_inline void _s390_hmac_algs_unregister(void)
+{
+	struct s390_hmac_alg *hmac;
+	int i;
+
+	for (i = ARRAY_SIZE(s390_hmac_algs) - 1; i >= 0; i--) {
+		hmac = &s390_hmac_algs[i];
+		if (!hmac->registered)
+			continue;
+		crypto_unregister_shash(&hmac->alg);
+	}
+}
+
+static int __init hmac_s390_init(void)
+{
+	struct s390_hmac_alg *hmac;
+	int i, rc = -ENODEV;
+
+	if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256))
+		return -ENODEV;
+	if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512))
+		return -ENODEV;
+
+	for (i = 0; i < ARRAY_SIZE(s390_hmac_algs); i++) {
+		hmac = &s390_hmac_algs[i];
+		if (!cpacf_query_func(CPACF_KMAC, hmac->fc))
+			continue;
+
+		rc = crypto_register_shash(&hmac->alg);
+		if (rc) {
+			pr_err("unable to register %s\n",
+			       hmac->alg.halg.base.cra_name);
+			goto out;
+		}
+		hmac->registered = true;
+		pr_debug("registered %s\n", hmac->alg.halg.base.cra_name);
+	}
+	return rc;
+out:
+	_s390_hmac_algs_unregister();
+	return rc;
+}
+
+static void __exit hmac_s390_exit(void)
+{
+	_s390_hmac_algs_unregister();
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, hmac_s390_init);
+module_exit(hmac_s390_exit);
+
+MODULE_DESCRIPTION("S390 HMAC driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index a279b7d23a5e..8a340c16acb4 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
  * s390 implementation of the AES Cipher Algorithm with protected keys.
  *
  * s390 Version:
- *   Copyright IBM Corp. 2017,2020
+ *   Copyright IBM Corp. 2017, 2025
  *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *		Harald Freudenberger <freude@de.ibm.com>
  */
@@ -13,16 +13,18 @@
 #define KMSG_COMPONENT "paes_s390"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/module.h>
+#include <linux/atomic.h>
 #include <linux/cpufeature.h>
+#include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/delay.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/engine.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
@@ -34,214 +36,464 @@
  * is called. As paes can handle different kinds of key blobs
  * and padding is also possible, the limits need to be generous.
  */
-#define PAES_MIN_KEYSIZE 16
-#define PAES_MAX_KEYSIZE 320
+#define PAES_MIN_KEYSIZE	16
+#define PAES_MAX_KEYSIZE	MAXEP11AESKEYBLOBSIZE
+#define PAES_256_PROTKEY_SIZE	(32 + 32)	/* key + verification pattern */
+#define PXTS_256_PROTKEY_SIZE	(32 + 32 + 32)	/* k1 + k2 + verification pattern */
 
 static u8 *ctrblk;
 static DEFINE_MUTEX(ctrblk_lock);
 
 static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
 
-struct key_blob {
-	/*
-	 * Small keys will be stored in the keybuf. Larger keys are
-	 * stored in extra allocated memory. In both cases does
-	 * key point to the memory where the key is stored.
-	 * The code distinguishes by checking keylen against
-	 * sizeof(keybuf). See the two following helper functions.
-	 */
-	u8 *key;
-	u8 keybuf[128];
+static struct crypto_engine *paes_crypto_engine;
+#define MAX_QLEN 10
+
+/*
+ * protected key specific stuff
+ */
+
+struct paes_protkey {
+	u32 type;
+	u32 len;
+	u8 protkey[PXTS_256_PROTKEY_SIZE];
+};
+
+#define PK_STATE_NO_KEY		     0
+#define PK_STATE_CONVERT_IN_PROGRESS 1
+#define PK_STATE_VALID		     2
+
+struct s390_paes_ctx {
+	/* source key material used to derive a protected key from */
+	u8 keybuf[PAES_MAX_KEYSIZE];
+	unsigned int keylen;
+
+	/* cpacf function code to use with this protected key type */
+	long fc;
+
+	/* nr of requests enqueued via crypto engine which use this tfm ctx */
+	atomic_t via_engine_ctr;
+
+	/* spinlock to atomic read/update all the following fields */
+	spinlock_t pk_lock;
+
+	/* see PK_STATE* defines above, < 0 holds convert failure rc  */
+	int pk_state;
+	/* if state is valid, pk holds the protected key */
+	struct paes_protkey pk;
+};
+
+struct s390_pxts_ctx {
+	/* source key material used to derive a protected key from */
+	u8 keybuf[2 * PAES_MAX_KEYSIZE];
 	unsigned int keylen;
+
+	/* cpacf function code to use with this protected key type */
+	long fc;
+
+	/* nr of requests enqueued via crypto engine which use this tfm ctx */
+	atomic_t via_engine_ctr;
+
+	/* spinlock to atomic read/update all the following fields */
+	spinlock_t pk_lock;
+
+	/* see PK_STATE* defines above, < 0 holds convert failure rc  */
+	int pk_state;
+	/* if state is valid, pk[] hold(s) the protected key(s) */
+	struct paes_protkey pk[2];
 };
 
-static inline int _key_to_kb(struct key_blob *kb,
-			     const u8 *key,
-			     unsigned int keylen)
+/*
+ * make_clrkey_token() - wrap the raw key ck with pkey clearkey token
+ * information.
+ * @returns the size of the clearkey token
+ */
+static inline u32 make_clrkey_token(const u8 *ck, size_t cklen, u8 *dest)
 {
-	struct clearkey_header {
+	struct clrkey_token {
 		u8  type;
 		u8  res0[3];
 		u8  version;
 		u8  res1[3];
 		u32 keytype;
 		u32 len;
-	} __packed * h;
+		u8 key[];
+	} __packed *token = (struct clrkey_token *)dest;
+
+	token->type = 0x00;
+	token->version = 0x02;
+	token->keytype = (cklen - 8) >> 3;
+	token->len = cklen;
+	memcpy(token->key, ck, cklen);
+
+	return sizeof(*token) + cklen;
+}
+
+/*
+ * paes_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int paes_ctx_setkey(struct s390_paes_ctx *ctx,
+				  const u8 *key, unsigned int keylen)
+{
+	if (keylen > sizeof(ctx->keybuf))
+		return -EINVAL;
 
 	switch (keylen) {
 	case 16:
 	case 24:
 	case 32:
 		/* clear key value, prepare pkey clear key token in keybuf */
-		memset(kb->keybuf, 0, sizeof(kb->keybuf));
-		h = (struct clearkey_header *) kb->keybuf;
-		h->version = 0x02; /* TOKVER_CLEAR_KEY */
-		h->keytype = (keylen - 8) >> 3;
-		h->len = keylen;
-		memcpy(kb->keybuf + sizeof(*h), key, keylen);
-		kb->keylen = sizeof(*h) + keylen;
-		kb->key = kb->keybuf;
+		memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+		ctx->keylen = make_clrkey_token(key, keylen, ctx->keybuf);
 		break;
 	default:
 		/* other key material, let pkey handle this */
-		if (keylen <= sizeof(kb->keybuf))
-			kb->key = kb->keybuf;
-		else {
-			kb->key = kmalloc(keylen, GFP_KERNEL);
-			if (!kb->key)
-				return -ENOMEM;
-		}
-		memcpy(kb->key, key, keylen);
-		kb->keylen = keylen;
+		memcpy(ctx->keybuf, key, keylen);
+		ctx->keylen = keylen;
 		break;
 	}
 
 	return 0;
 }
 
-static inline void _free_kb_keybuf(struct key_blob *kb)
+/*
+ * pxts_ctx_setkey() - Set key value into context, maybe construct
+ * a clear key token digestible by pkey from a clear key value.
+ */
+static inline int pxts_ctx_setkey(struct s390_pxts_ctx *ctx,
+				  const u8 *key, unsigned int keylen)
 {
-	if (kb->key && kb->key != kb->keybuf
-	    && kb->keylen > sizeof(kb->keybuf)) {
-		kfree(kb->key);
-		kb->key = NULL;
-	}
-}
+	size_t cklen = keylen / 2;
 
-struct s390_paes_ctx {
-	struct key_blob kb;
-	struct pkey_protkey pk;
-	spinlock_t pk_lock;
-	unsigned long fc;
-};
+	if (keylen > sizeof(ctx->keybuf))
+		return -EINVAL;
 
-struct s390_pxts_ctx {
-	struct key_blob kb[2];
-	struct pkey_protkey pk[2];
-	spinlock_t pk_lock;
-	unsigned long fc;
-};
+	switch (keylen) {
+	case 32:
+	case 64:
+		/* clear key value, prepare pkey clear key tokens in keybuf */
+		memset(ctx->keybuf, 0, sizeof(ctx->keybuf));
+		ctx->keylen = make_clrkey_token(key, cklen, ctx->keybuf);
+		ctx->keylen += make_clrkey_token(key + cklen, cklen,
+						 ctx->keybuf + ctx->keylen);
+		break;
+	default:
+		/* other key material, let pkey handle this */
+		memcpy(ctx->keybuf, key, keylen);
+		ctx->keylen = keylen;
+		break;
+	}
+
+	return 0;
+}
 
-static inline int __paes_keyblob2pkey(struct key_blob *kb,
-				     struct pkey_protkey *pk)
+/*
+ * Convert the raw key material into a protected key via PKEY api.
+ * This function may sleep - don't call in non-sleeping context.
+ */
+static inline int convert_key(const u8 *key, unsigned int keylen,
+			      struct paes_protkey *pk)
 {
-	int i, ret;
+	int rc, i;
 
-	/* try three times in case of failure */
-	for (i = 0; i < 3; i++) {
-		if (i > 0 && ret == -EAGAIN && in_task())
-			if (msleep_interruptible(1000))
-				return -EINTR;
-		ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk);
-		if (ret == 0)
-			break;
+	pk->len = sizeof(pk->protkey);
+
+	/*
+	 * In case of a busy card retry with increasing delay
+	 * of 200, 400, 800 and 1600 ms - in total 3 s.
+	 */
+	for (rc = -EIO, i = 0; rc && i < 5; i++) {
+		if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) {
+			rc = -EINTR;
+			goto out;
+		}
+		rc = pkey_key2protkey(key, keylen,
+				      pk->protkey, &pk->len, &pk->type,
+				      PKEY_XFLAG_NOMEMALLOC);
 	}
 
-	return ret;
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+/*
+ * (Re-)Convert the raw key material from the ctx into a protected key
+ * via convert_key() function. Update the pk_state, pk_type, pk_len
+ * and the protected key in the tfm context.
+ * Please note this function may be invoked concurrently with the very
+ * same tfm context. The pk_lock spinlock in the context ensures an
+ * atomic update of the pk and the pk state but does not guarantee any
+ * order of update. So a fresh converted valid protected key may get
+ * updated with an 'old' expired key value. As the cpacf instructions
+ * detect this, refuse to operate with an invalid key and the calling
+ * code triggers a (re-)conversion this does no harm. This may lead to
+ * unnecessary additional conversion but never to invalid data on en-
+ * or decrypt operations.
+ */
+static int paes_convert_key(struct s390_paes_ctx *ctx)
 {
-	int ret;
-	struct pkey_protkey pkey;
+	struct paes_protkey pk;
+	int rc;
+
+	spin_lock_bh(&ctx->pk_lock);
+	ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+	spin_unlock_bh(&ctx->pk_lock);
 
-	ret = __paes_keyblob2pkey(&ctx->kb, &pkey);
-	if (ret)
-		return ret;
+	rc = convert_key(ctx->keybuf, ctx->keylen, &pk);
 
+	/* update context */
 	spin_lock_bh(&ctx->pk_lock);
-	memcpy(&ctx->pk, &pkey, sizeof(pkey));
+	if (rc) {
+		ctx->pk_state = rc;
+	} else {
+		ctx->pk_state = PK_STATE_VALID;
+		ctx->pk = pk;
+	}
 	spin_unlock_bh(&ctx->pk_lock);
 
-	return 0;
+	memzero_explicit(&pk, sizeof(pk));
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int ecb_paes_init(struct crypto_skcipher *tfm)
+/*
+ * (Re-)Convert the raw xts key material from the ctx into a
+ * protected key via convert_key() function. Update the pk_state,
+ * pk_type, pk_len and the protected key in the tfm context.
+ * See also comments on function paes_convert_key.
+ */
+static int pxts_convert_key(struct s390_pxts_ctx *ctx)
 {
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct paes_protkey pk0, pk1;
+	size_t split_keylen;
+	int rc;
 
-	ctx->kb.key = NULL;
-	spin_lock_init(&ctx->pk_lock);
+	spin_lock_bh(&ctx->pk_lock);
+	ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS;
+	spin_unlock_bh(&ctx->pk_lock);
 
-	return 0;
-}
+	rc = convert_key(ctx->keybuf, ctx->keylen, &pk0);
+	if (rc)
+		goto out;
+
+	switch (pk0.type) {
+	case PKEY_KEYTYPE_AES_128:
+	case PKEY_KEYTYPE_AES_256:
+		/* second keytoken required */
+		if (ctx->keylen % 2) {
+			rc = -EINVAL;
+			goto out;
+		}
+		split_keylen = ctx->keylen / 2;
+		rc = convert_key(ctx->keybuf + split_keylen,
+				 split_keylen, &pk1);
+		if (rc)
+			goto out;
+		if (pk0.type != pk1.type) {
+			rc = -EINVAL;
+			goto out;
+		}
+		break;
+	case PKEY_KEYTYPE_AES_XTS_128:
+	case PKEY_KEYTYPE_AES_XTS_256:
+		/* single key */
+		pk1.type = 0;
+		break;
+	default:
+		/* unsupported protected keytype */
+		rc = -EINVAL;
+		goto out;
+	}
 
-static void ecb_paes_exit(struct crypto_skcipher *tfm)
-{
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+out:
+	/* update context */
+	spin_lock_bh(&ctx->pk_lock);
+	if (rc) {
+		ctx->pk_state = rc;
+	} else {
+		ctx->pk_state = PK_STATE_VALID;
+		ctx->pk[0] = pk0;
+		ctx->pk[1] = pk1;
+	}
+	spin_unlock_bh(&ctx->pk_lock);
 
-	_free_kb_keybuf(&ctx->kb);
+	memzero_explicit(&pk0, sizeof(pk0));
+	memzero_explicit(&pk1, sizeof(pk1));
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
+/*
+ * PAES ECB implementation
+ */
+
+struct ecb_param {
+	u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pecb_req_ctx {
+	unsigned long modifier;
+	struct skcipher_walk walk;
+	bool param_init_done;
+	struct ecb_param param;
+};
+
+static int ecb_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
 {
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	long fc;
 	int rc;
-	unsigned long fc;
 
-	rc = __paes_convert_key(ctx);
+	/* set raw key into context */
+	rc = paes_ctx_setkey(ctx, in_key, key_len);
 	if (rc)
-		return rc;
+		goto out;
 
-	/* Pick the correct function code based on the protected key type */
-	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0;
+	/* convert key into protected key */
+	rc = paes_convert_key(ctx);
+	if (rc)
+		goto out;
 
-	/* Check if the function code is available */
+	/* Pick the correct function code based on the protected key type */
+	switch (ctx->pk.type) {
+	case PKEY_KEYTYPE_AES_128:
+		fc = CPACF_KM_PAES_128;
+		break;
+	case PKEY_KEYTYPE_AES_192:
+		fc = CPACF_KM_PAES_192;
+		break;
+	case PKEY_KEYTYPE_AES_256:
+		fc = CPACF_KM_PAES_256;
+		break;
+	default:
+		fc = 0;
+		break;
+	}
 	ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
 
-	return ctx->fc ? 0 : -EINVAL;
+	rc = fc ? 0 : -EINVAL;
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
-			    unsigned int key_len)
+static int ecb_paes_do_crypt(struct s390_paes_ctx *ctx,
+			     struct s390_pecb_req_ctx *req_ctx,
+			     bool maysleep)
 {
-	int rc;
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-
-	_free_kb_keybuf(&ctx->kb);
-	rc = _key_to_kb(&ctx->kb, in_key, key_len);
+	struct ecb_param *param = &req_ctx->param;
+	struct skcipher_walk *walk = &req_ctx->walk;
+	unsigned int nbytes, n, k;
+	int pk_state, rc = 0;
+
+	if (!req_ctx->param_init_done) {
+		/* fetch and check protected key state */
+		spin_lock_bh(&ctx->pk_lock);
+		pk_state = ctx->pk_state;
+		switch (pk_state) {
+		case PK_STATE_NO_KEY:
+			rc = -ENOKEY;
+			break;
+		case PK_STATE_CONVERT_IN_PROGRESS:
+			rc = -EKEYEXPIRED;
+			break;
+		case PK_STATE_VALID:
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			req_ctx->param_init_done = true;
+			break;
+		default:
+			rc = pk_state < 0 ? pk_state : -EIO;
+			break;
+		}
+		spin_unlock_bh(&ctx->pk_lock);
+	}
 	if (rc)
-		return rc;
+		goto out;
+
+	/*
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+	while ((nbytes = walk->nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		k = cpacf_km(ctx->fc | req_ctx->modifier, param,
+			     walk->dst.virt.addr, walk->src.virt.addr, n);
+		if (k)
+			rc = skcipher_walk_done(walk, nbytes - k);
+		if (k < n) {
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = paes_convert_key(ctx);
+			if (rc)
+				goto out;
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
 
-	return __ecb_paes_set_key(ctx);
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
 static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
+	struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	unsigned int nbytes, n, k;
-	int ret;
-	struct {
-		u8 key[MAXPROTKEYSIZE];
-	} param;
+	struct skcipher_walk *walk = &req_ctx->walk;
+	int rc;
 
-	ret = skcipher_walk_virt(&walk, req, false);
-	if (ret)
-		return ret;
+	/*
+	 * Attempt synchronous encryption first. If it fails, schedule the request
+	 * asynchronously via the crypto engine. To preserve execution order,
+	 * once a request is queued to the engine, further requests using the same
+	 * tfm will also be routed through the engine.
+	 */
 
-	spin_lock_bh(&ctx->pk_lock);
-	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	spin_unlock_bh(&ctx->pk_lock);
+	rc = skcipher_walk_virt(walk, req, false);
+	if (rc)
+		goto out;
 
-	while ((nbytes = walk.nbytes) != 0) {
-		/* only use complete blocks */
-		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		k = cpacf_km(ctx->fc | modifier, &param,
-			     walk.dst.virt.addr, walk.src.virt.addr, n);
-		if (k)
-			ret = skcipher_walk_done(&walk, nbytes - k);
-		if (k < n) {
-			if (__paes_convert_key(ctx))
-				return skcipher_walk_done(&walk, -EIO);
-			spin_lock_bh(&ctx->pk_lock);
-			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-			spin_unlock_bh(&ctx->pk_lock);
-		}
+	req_ctx->modifier = modifier;
+	req_ctx->param_init_done = false;
+
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&ctx->via_engine_ctr)) {
+		rc = ecb_paes_do_crypt(ctx, req_ctx, false);
+		if (rc == 0)
+			goto out;
 	}
-	return ret;
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		atomic_inc(&ctx->via_engine_ctr);
+		rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&ctx->via_engine_ctr);
+	}
+
+	if (rc != -EINPROGRESS)
+		skcipher_walk_done(walk, rc);
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
 static int ecb_paes_encrypt(struct skcipher_request *req)
@@ -254,113 +506,257 @@ static int ecb_paes_decrypt(struct skcipher_request *req)
 	return ecb_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct skcipher_alg ecb_paes_alg = {
-	.base.cra_name		=	"ecb(paes)",
-	.base.cra_driver_name	=	"ecb-paes-s390",
-	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
-	.base.cra_blocksize	=	AES_BLOCK_SIZE,
-	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
-	.base.cra_module	=	THIS_MODULE,
-	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
-	.init			=	ecb_paes_init,
-	.exit			=	ecb_paes_exit,
-	.min_keysize		=	PAES_MIN_KEYSIZE,
-	.max_keysize		=	PAES_MAX_KEYSIZE,
-	.setkey			=	ecb_paes_set_key,
-	.encrypt		=	ecb_paes_encrypt,
-	.decrypt		=	ecb_paes_decrypt,
-};
-
-static int cbc_paes_init(struct crypto_skcipher *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
 {
 	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	ctx->kb.key = NULL;
+	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->pk_lock);
 
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pecb_req_ctx));
+
 	return 0;
 }
 
-static void cbc_paes_exit(struct crypto_skcipher *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
 {
 	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	_free_kb_keybuf(&ctx->kb);
+	memzero_explicit(ctx, sizeof(*ctx));
 }
 
-static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static int ecb_paes_do_one_request(struct crypto_engine *engine, void *areq)
 {
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct s390_pecb_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
 	int rc;
-	unsigned long fc;
 
-	rc = __paes_convert_key(ctx);
-	if (rc)
-		return rc;
+	/* walk has already been prepared */
+
+	rc = ecb_paes_do_crypt(ctx, req_ctx, true);
+	if (rc == -EKEYEXPIRED) {
+		/*
+		 * Protected key expired, conversion is in process.
+		 * Trigger a re-schedule of this request by returning
+		 * -ENOSPC ("hardware queue is full") to the crypto engine.
+		 * To avoid immediately re-invocation of this callback,
+		 * tell the scheduler to voluntarily give up the CPU here.
+		 */
+		cond_resched();
+		pr_debug("rescheduling request\n");
+		return -ENOSPC;
+	} else if (rc) {
+		skcipher_walk_done(walk, rc);
+	}
 
-	/* Pick the correct function code based on the protected key type */
-	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMC_PAES_128 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMC_PAES_192 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KMC_PAES_256 : 0;
+	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("request complete with rc=%d\n", rc);
+	local_bh_disable();
+	atomic_dec(&ctx->via_engine_ctr);
+	crypto_finalize_skcipher_request(engine, req, rc);
+	local_bh_enable();
+	return rc;
+}
 
-	/* Check if the function code is available */
-	ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+static struct skcipher_engine_alg ecb_paes_alg = {
+	.base = {
+		.base.cra_name	      = "ecb(paes)",
+		.base.cra_driver_name = "ecb-paes-s390",
+		.base.cra_priority    = 401,	/* combo: aes + ecb + 1 */
+		.base.cra_blocksize   = AES_BLOCK_SIZE,
+		.base.cra_ctxsize     = sizeof(struct s390_paes_ctx),
+		.base.cra_module      = THIS_MODULE,
+		.base.cra_list	      = LIST_HEAD_INIT(ecb_paes_alg.base.base.cra_list),
+		.init		      = ecb_paes_init,
+		.exit		      = ecb_paes_exit,
+		.min_keysize	      = PAES_MIN_KEYSIZE,
+		.max_keysize	      = PAES_MAX_KEYSIZE,
+		.setkey		      = ecb_paes_setkey,
+		.encrypt	      = ecb_paes_encrypt,
+		.decrypt	      = ecb_paes_decrypt,
+	},
+	.op = {
+		.do_one_request	      = ecb_paes_do_one_request,
+	},
+};
 
-	return ctx->fc ? 0 : -EINVAL;
-}
+/*
+ * PAES CBC implementation
+ */
+
+struct cbc_param {
+	u8 iv[AES_BLOCK_SIZE];
+	u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pcbc_req_ctx {
+	unsigned long modifier;
+	struct skcipher_walk walk;
+	bool param_init_done;
+	struct cbc_param param;
+};
 
-static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
-			    unsigned int key_len)
+static int cbc_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
 {
-	int rc;
 	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	long fc;
+	int rc;
 
-	_free_kb_keybuf(&ctx->kb);
-	rc = _key_to_kb(&ctx->kb, in_key, key_len);
+	/* set raw key into context */
+	rc = paes_ctx_setkey(ctx, in_key, key_len);
 	if (rc)
-		return rc;
+		goto out;
+
+	/* convert raw key into protected key */
+	rc = paes_convert_key(ctx);
+	if (rc)
+		goto out;
+
+	/* Pick the correct function code based on the protected key type */
+	switch (ctx->pk.type) {
+	case PKEY_KEYTYPE_AES_128:
+		fc = CPACF_KMC_PAES_128;
+		break;
+	case PKEY_KEYTYPE_AES_192:
+		fc = CPACF_KMC_PAES_192;
+		break;
+	case PKEY_KEYTYPE_AES_256:
+		fc = CPACF_KMC_PAES_256;
+		break;
+	default:
+		fc = 0;
+		break;
+	}
+	ctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
 
-	return __cbc_paes_set_key(ctx);
+	rc = fc ? 0 : -EINVAL;
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int cbc_paes_do_crypt(struct s390_paes_ctx *ctx,
+			     struct s390_pcbc_req_ctx *req_ctx,
+			     bool maysleep)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
+	struct cbc_param *param = &req_ctx->param;
+	struct skcipher_walk *walk = &req_ctx->walk;
 	unsigned int nbytes, n, k;
-	int ret;
-	struct {
-		u8 iv[AES_BLOCK_SIZE];
-		u8 key[MAXPROTKEYSIZE];
-	} param;
-
-	ret = skcipher_walk_virt(&walk, req, false);
-	if (ret)
-		return ret;
+	int pk_state, rc = 0;
+
+	if (!req_ctx->param_init_done) {
+		/* fetch and check protected key state */
+		spin_lock_bh(&ctx->pk_lock);
+		pk_state = ctx->pk_state;
+		switch (pk_state) {
+		case PK_STATE_NO_KEY:
+			rc = -ENOKEY;
+			break;
+		case PK_STATE_CONVERT_IN_PROGRESS:
+			rc = -EKEYEXPIRED;
+			break;
+		case PK_STATE_VALID:
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			req_ctx->param_init_done = true;
+			break;
+		default:
+			rc = pk_state < 0 ? pk_state : -EIO;
+			break;
+		}
+		spin_unlock_bh(&ctx->pk_lock);
+	}
+	if (rc)
+		goto out;
 
-	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
-	spin_lock_bh(&ctx->pk_lock);
-	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	spin_unlock_bh(&ctx->pk_lock);
+	memcpy(param->iv, walk->iv, AES_BLOCK_SIZE);
 
-	while ((nbytes = walk.nbytes) != 0) {
+	/*
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+	while ((nbytes = walk->nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		k = cpacf_kmc(ctx->fc | modifier, &param,
-			      walk.dst.virt.addr, walk.src.virt.addr, n);
+		k = cpacf_kmc(ctx->fc | req_ctx->modifier, param,
+			      walk->dst.virt.addr, walk->src.virt.addr, n);
 		if (k) {
-			memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
-			ret = skcipher_walk_done(&walk, nbytes - k);
+			memcpy(walk->iv, param->iv, AES_BLOCK_SIZE);
+			rc = skcipher_walk_done(walk, nbytes - k);
 		}
 		if (k < n) {
-			if (__paes_convert_key(ctx))
-				return skcipher_walk_done(&walk, -EIO);
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = paes_convert_key(ctx);
+			if (rc)
+				goto out;
 			spin_lock_bh(&ctx->pk_lock);
-			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
 			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
-	return ret;
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+{
+	struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
+	int rc;
+
+	/*
+	 * Attempt synchronous encryption first. If it fails, schedule the request
+	 * asynchronously via the crypto engine. To preserve execution order,
+	 * once a request is queued to the engine, further requests using the same
+	 * tfm will also be routed through the engine.
+	 */
+
+	rc = skcipher_walk_virt(walk, req, false);
+	if (rc)
+		goto out;
+
+	req_ctx->modifier = modifier;
+	req_ctx->param_init_done = false;
+
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&ctx->via_engine_ctr)) {
+		rc = cbc_paes_do_crypt(ctx, req_ctx, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		atomic_inc(&ctx->via_engine_ctr);
+		rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&ctx->via_engine_ctr);
+	}
+
+	if (rc != -EINPROGRESS)
+		skcipher_walk_done(walk, rc);
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
 static int cbc_paes_encrypt(struct skcipher_request *req)
@@ -373,378 +769,881 @@ static int cbc_paes_decrypt(struct skcipher_request *req)
 	return cbc_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct skcipher_alg cbc_paes_alg = {
-	.base.cra_name		=	"cbc(paes)",
-	.base.cra_driver_name	=	"cbc-paes-s390",
-	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
-	.base.cra_blocksize	=	AES_BLOCK_SIZE,
-	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
-	.base.cra_module	=	THIS_MODULE,
-	.base.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
-	.init			=	cbc_paes_init,
-	.exit			=	cbc_paes_exit,
-	.min_keysize		=	PAES_MIN_KEYSIZE,
-	.max_keysize		=	PAES_MAX_KEYSIZE,
-	.ivsize			=	AES_BLOCK_SIZE,
-	.setkey			=	cbc_paes_set_key,
-	.encrypt		=	cbc_paes_encrypt,
-	.decrypt		=	cbc_paes_decrypt,
-};
-
-static int xts_paes_init(struct crypto_skcipher *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	ctx->kb[0].key = NULL;
-	ctx->kb[1].key = NULL;
+	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->pk_lock);
 
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pcbc_req_ctx));
+
 	return 0;
 }
 
-static void xts_paes_exit(struct crypto_skcipher *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	_free_kb_keybuf(&ctx->kb[0]);
-	_free_kb_keybuf(&ctx->kb[1]);
+	memzero_explicit(ctx, sizeof(*ctx));
 }
 
-static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+static int cbc_paes_do_one_request(struct crypto_engine *engine, void *areq)
 {
-	struct pkey_protkey pkey0, pkey1;
-
-	if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
-	    __paes_keyblob2pkey(&ctx->kb[1], &pkey1))
-		return -EINVAL;
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct s390_pcbc_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
+	int rc;
 
-	spin_lock_bh(&ctx->pk_lock);
-	memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0));
-	memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1));
-	spin_unlock_bh(&ctx->pk_lock);
+	/* walk has already been prepared */
+
+	rc = cbc_paes_do_crypt(ctx, req_ctx, true);
+	if (rc == -EKEYEXPIRED) {
+		/*
+		 * Protected key expired, conversion is in process.
+		 * Trigger a re-schedule of this request by returning
+		 * -ENOSPC ("hardware queue is full") to the crypto engine.
+		 * To avoid immediately re-invocation of this callback,
+		 * tell the scheduler to voluntarily give up the CPU here.
+		 */
+		cond_resched();
+		pr_debug("rescheduling request\n");
+		return -ENOSPC;
+	} else if (rc) {
+		skcipher_walk_done(walk, rc);
+	}
 
-	return 0;
+	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("request complete with rc=%d\n", rc);
+	local_bh_disable();
+	atomic_dec(&ctx->via_engine_ctr);
+	crypto_finalize_skcipher_request(engine, req, rc);
+	local_bh_enable();
+	return rc;
 }
 
-static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+static struct skcipher_engine_alg cbc_paes_alg = {
+	.base = {
+		.base.cra_name	      = "cbc(paes)",
+		.base.cra_driver_name = "cbc-paes-s390",
+		.base.cra_priority    = 402,	/* cbc-paes-s390 + 1 */
+		.base.cra_blocksize   = AES_BLOCK_SIZE,
+		.base.cra_ctxsize     = sizeof(struct s390_paes_ctx),
+		.base.cra_module      = THIS_MODULE,
+		.base.cra_list	      = LIST_HEAD_INIT(cbc_paes_alg.base.base.cra_list),
+		.init		      = cbc_paes_init,
+		.exit		      = cbc_paes_exit,
+		.min_keysize	      = PAES_MIN_KEYSIZE,
+		.max_keysize	      = PAES_MAX_KEYSIZE,
+		.ivsize		      = AES_BLOCK_SIZE,
+		.setkey		      = cbc_paes_setkey,
+		.encrypt	      = cbc_paes_encrypt,
+		.decrypt	      = cbc_paes_decrypt,
+	},
+	.op = {
+		.do_one_request	      = cbc_paes_do_one_request,
+	},
+};
+
+/*
+ * PAES CTR implementation
+ */
+
+struct ctr_param {
+	u8 key[PAES_256_PROTKEY_SIZE];
+} __packed;
+
+struct s390_pctr_req_ctx {
+	unsigned long modifier;
+	struct skcipher_walk walk;
+	bool param_init_done;
+	struct ctr_param param;
+};
+
+static int ctr_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int key_len)
 {
-	unsigned long fc;
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	long fc;
+	int rc;
 
-	if (__xts_paes_convert_key(ctx))
-		return -EINVAL;
+	/* set raw key into context */
+	rc = paes_ctx_setkey(ctx, in_key, key_len);
+	if (rc)
+		goto out;
 
-	if (ctx->pk[0].type != ctx->pk[1].type)
-		return -EINVAL;
+	/* convert raw key into protected key */
+	rc = paes_convert_key(ctx);
+	if (rc)
+		goto out;
 
 	/* Pick the correct function code based on the protected key type */
-	fc = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PXTS_128 :
-		(ctx->pk[0].type == PKEY_KEYTYPE_AES_256) ?
-		CPACF_KM_PXTS_256 : 0;
+	switch (ctx->pk.type) {
+	case PKEY_KEYTYPE_AES_128:
+		fc = CPACF_KMCTR_PAES_128;
+		break;
+	case PKEY_KEYTYPE_AES_192:
+		fc = CPACF_KMCTR_PAES_192;
+		break;
+	case PKEY_KEYTYPE_AES_256:
+		fc = CPACF_KMCTR_PAES_256;
+		break;
+	default:
+		fc = 0;
+		break;
+	}
+	ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
 
-	/* Check if the function code is available */
-	ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	rc = fc ? 0 : -EINVAL;
 
-	return ctx->fc ? 0 : -EINVAL;
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
-			    unsigned int xts_key_len)
+static inline unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
+	}
+	return n;
+}
+
+static int ctr_paes_do_crypt(struct s390_paes_ctx *ctx,
+			     struct s390_pctr_req_ctx *req_ctx,
+			     bool maysleep)
+{
+	struct ctr_param *param = &req_ctx->param;
+	struct skcipher_walk *walk = &req_ctx->walk;
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	unsigned int nbytes, n, k;
+	int pk_state, locked, rc = 0;
+
+	if (!req_ctx->param_init_done) {
+		/* fetch and check protected key state */
+		spin_lock_bh(&ctx->pk_lock);
+		pk_state = ctx->pk_state;
+		switch (pk_state) {
+		case PK_STATE_NO_KEY:
+			rc = -ENOKEY;
+			break;
+		case PK_STATE_CONVERT_IN_PROGRESS:
+			rc = -EKEYEXPIRED;
+			break;
+		case PK_STATE_VALID:
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			req_ctx->param_init_done = true;
+			break;
+		default:
+			rc = pk_state < 0 ? pk_state : -EIO;
+			break;
+		}
+		spin_unlock_bh(&ctx->pk_lock);
+	}
+	if (rc)
+		goto out;
+
+	locked = mutex_trylock(&ctrblk_lock);
+
+	/*
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+		n = AES_BLOCK_SIZE;
+		if (nbytes >= 2 * AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		k = cpacf_kmctr(ctx->fc, param, walk->dst.virt.addr,
+				walk->src.virt.addr, n, ctrptr);
+		if (k) {
+			if (ctrptr == ctrblk)
+				memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+			crypto_inc(walk->iv, AES_BLOCK_SIZE);
+			rc = skcipher_walk_done(walk, nbytes - k);
+		}
+		if (k < n) {
+			if (!maysleep) {
+				if (locked)
+					mutex_unlock(&ctrblk_lock);
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = paes_convert_key(ctx);
+			if (rc) {
+				if (locked)
+					mutex_unlock(&ctrblk_lock);
+				goto out;
+			}
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+	if (locked)
+		mutex_unlock(&ctrblk_lock);
+
+	/* final block may be < AES_BLOCK_SIZE, copy only nbytes */
+	if (nbytes) {
+		memset(buf, 0, AES_BLOCK_SIZE);
+		memcpy(buf, walk->src.virt.addr, nbytes);
+		while (1) {
+			if (cpacf_kmctr(ctx->fc, param, buf,
+					buf, AES_BLOCK_SIZE,
+					walk->iv) == AES_BLOCK_SIZE)
+				break;
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = paes_convert_key(ctx);
+			if (rc)
+				goto out;
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param->key, ctx->pk.protkey, sizeof(param->key));
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
+		rc = skcipher_walk_done(walk, 0);
+	}
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int ctr_paes_crypt(struct skcipher_request *req)
+{
+	struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
 	int rc;
+
+	/*
+	 * Attempt synchronous encryption first. If it fails, schedule the request
+	 * asynchronously via the crypto engine. To preserve execution order,
+	 * once a request is queued to the engine, further requests using the same
+	 * tfm will also be routed through the engine.
+	 */
+
+	rc = skcipher_walk_virt(walk, req, false);
+	if (rc)
+		goto out;
+
+	req_ctx->param_init_done = false;
+
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&ctx->via_engine_ctr)) {
+		rc = ctr_paes_do_crypt(ctx, req_ctx, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		atomic_inc(&ctx->via_engine_ctr);
+		rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&ctx->via_engine_ctr);
+	}
+
+	if (rc != -EINPROGRESS)
+		skcipher_walk_done(walk, rc);
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("rc=%d\n", rc);
+	return rc;
+}
+
+static int ctr_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->pk_lock);
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pctr_req_ctx));
+
+	return 0;
+}
+
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memzero_explicit(ctx, sizeof(*ctx));
+}
+
+static int ctr_paes_do_one_request(struct crypto_engine *engine, void *areq)
+{
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct s390_pctr_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
+	int rc;
+
+	/* walk has already been prepared */
+
+	rc = ctr_paes_do_crypt(ctx, req_ctx, true);
+	if (rc == -EKEYEXPIRED) {
+		/*
+		 * Protected key expired, conversion is in process.
+		 * Trigger a re-schedule of this request by returning
+		 * -ENOSPC ("hardware queue is full") to the crypto engine.
+		 * To avoid immediately re-invocation of this callback,
+		 * tell the scheduler to voluntarily give up the CPU here.
+		 */
+		cond_resched();
+		pr_debug("rescheduling request\n");
+		return -ENOSPC;
+	} else if (rc) {
+		skcipher_walk_done(walk, rc);
+	}
+
+	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("request complete with rc=%d\n", rc);
+	local_bh_disable();
+	atomic_dec(&ctx->via_engine_ctr);
+	crypto_finalize_skcipher_request(engine, req, rc);
+	local_bh_enable();
+	return rc;
+}
+
+static struct skcipher_engine_alg ctr_paes_alg = {
+	.base = {
+		.base.cra_name	      =	"ctr(paes)",
+		.base.cra_driver_name =	"ctr-paes-s390",
+		.base.cra_priority    =	402,	/* ecb-paes-s390 + 1 */
+		.base.cra_blocksize   =	1,
+		.base.cra_ctxsize     =	sizeof(struct s390_paes_ctx),
+		.base.cra_module      =	THIS_MODULE,
+		.base.cra_list	      =	LIST_HEAD_INIT(ctr_paes_alg.base.base.cra_list),
+		.init		      =	ctr_paes_init,
+		.exit		      =	ctr_paes_exit,
+		.min_keysize	      =	PAES_MIN_KEYSIZE,
+		.max_keysize	      =	PAES_MAX_KEYSIZE,
+		.ivsize		      =	AES_BLOCK_SIZE,
+		.setkey		      =	ctr_paes_setkey,
+		.encrypt	      =	ctr_paes_crypt,
+		.decrypt	      =	ctr_paes_crypt,
+		.chunksize	      =	AES_BLOCK_SIZE,
+	},
+	.op = {
+		.do_one_request	      = ctr_paes_do_one_request,
+	},
+};
+
+/*
+ * PAES XTS implementation
+ */
+
+struct xts_full_km_param {
+	u8 key[64];
+	u8 tweak[16];
+	u8 nap[16];
+	u8 wkvp[32];
+} __packed;
+
+struct xts_km_param {
+	u8 key[PAES_256_PROTKEY_SIZE];
+	u8 init[16];
+} __packed;
+
+struct xts_pcc_param {
+	u8 key[PAES_256_PROTKEY_SIZE];
+	u8 tweak[16];
+	u8 block[16];
+	u8 bit[16];
+	u8 xts[16];
+} __packed;
+
+struct s390_pxts_req_ctx {
+	unsigned long modifier;
+	struct skcipher_walk walk;
+	bool param_init_done;
+	union {
+		struct xts_full_km_param full_km_param;
+		struct xts_km_param km_param;
+	} param;
+};
+
+static int xts_paes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			   unsigned int in_keylen)
+{
 	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 ckey[2 * AES_MAX_KEY_SIZE];
-	unsigned int ckey_len, key_len;
+	unsigned int ckey_len;
+	long fc;
+	int rc;
 
-	if (xts_key_len % 2)
+	if ((in_keylen == 32 || in_keylen == 64) &&
+	    xts_verify_key(tfm, in_key, in_keylen))
 		return -EINVAL;
 
-	key_len = xts_key_len / 2;
-
-	_free_kb_keybuf(&ctx->kb[0]);
-	_free_kb_keybuf(&ctx->kb[1]);
-	rc = _key_to_kb(&ctx->kb[0], in_key, key_len);
+	/* set raw key into context */
+	rc = pxts_ctx_setkey(ctx, in_key, in_keylen);
 	if (rc)
-		return rc;
-	rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
-	if (rc)
-		return rc;
+		goto out;
 
-	rc = __xts_paes_set_key(ctx);
+	/* convert raw key(s) into protected key(s) */
+	rc = pxts_convert_key(ctx);
 	if (rc)
-		return rc;
+		goto out;
 
 	/*
-	 * xts_check_key verifies the key length is not odd and makes
+	 * xts_verify_key verifies the key length is not odd and makes
 	 * sure that the two keys are not the same. This can be done
-	 * on the two protected keys as well
+	 * on the two protected keys as well - but not for full xts keys.
 	 */
-	ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
-		AES_KEYSIZE_128 : AES_KEYSIZE_256;
-	memcpy(ckey, ctx->pk[0].protkey, ckey_len);
-	memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
-	return xts_verify_key(tfm, ckey, 2*ckey_len);
+	if (ctx->pk[0].type == PKEY_KEYTYPE_AES_128 ||
+	    ctx->pk[0].type == PKEY_KEYTYPE_AES_256) {
+		ckey_len = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ?
+			AES_KEYSIZE_128 : AES_KEYSIZE_256;
+		memcpy(ckey, ctx->pk[0].protkey, ckey_len);
+		memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
+		rc = xts_verify_key(tfm, ckey, 2 * ckey_len);
+		memzero_explicit(ckey, sizeof(ckey));
+		if (rc)
+			goto out;
+	}
+
+	/* Pick the correct function code based on the protected key type */
+	switch (ctx->pk[0].type) {
+	case PKEY_KEYTYPE_AES_128:
+		fc = CPACF_KM_PXTS_128;
+		break;
+	case PKEY_KEYTYPE_AES_256:
+		fc = CPACF_KM_PXTS_256;
+		break;
+	case PKEY_KEYTYPE_AES_XTS_128:
+		fc = CPACF_KM_PXTS_128_FULL;
+		break;
+	case PKEY_KEYTYPE_AES_XTS_256:
+		fc = CPACF_KM_PXTS_256_FULL;
+		break;
+	default:
+		fc = 0;
+		break;
+	}
+	ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+
+	rc = fc ? 0 : -EINVAL;
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
+static int xts_paes_do_crypt_fullkey(struct s390_pxts_ctx *ctx,
+				     struct s390_pxts_req_ctx *req_ctx,
+				     bool maysleep)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
+	struct xts_full_km_param *param = &req_ctx->param.full_km_param;
+	struct skcipher_walk *walk = &req_ctx->walk;
 	unsigned int keylen, offset, nbytes, n, k;
-	int ret;
-	struct {
-		u8 key[MAXPROTKEYSIZE];	/* key + verification pattern */
-		u8 tweak[16];
-		u8 block[16];
-		u8 bit[16];
-		u8 xts[16];
-	} pcc_param;
-	struct {
-		u8 key[MAXPROTKEYSIZE];	/* key + verification pattern */
-		u8 init[16];
-	} xts_param;
-
-	ret = skcipher_walk_virt(&walk, req, false);
-	if (ret)
-		return ret;
+	int rc = 0;
 
-	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
-	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
+	/*
+	 * The calling function xts_paes_do_crypt() ensures the
+	 * protected key state is always PK_STATE_VALID when this
+	 * function is invoked.
+	 */
 
-	memset(&pcc_param, 0, sizeof(pcc_param));
-	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
-	spin_lock_bh(&ctx->pk_lock);
-	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
-	memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
-	spin_unlock_bh(&ctx->pk_lock);
-	cpacf_pcc(ctx->fc, pcc_param.key + offset);
-	memcpy(xts_param.init, pcc_param.xts, 16);
+	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 64;
+	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_XTS_128) ? 32 : 0;
+
+	if (!req_ctx->param_init_done) {
+		memset(param, 0, sizeof(*param));
+		spin_lock_bh(&ctx->pk_lock);
+		memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+		memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
+		spin_unlock_bh(&ctx->pk_lock);
+		memcpy(param->tweak, walk->iv, sizeof(param->tweak));
+		param->nap[0] = 0x01; /* initial alpha power (1, little-endian) */
+		req_ctx->param_init_done = true;
+	}
 
-	while ((nbytes = walk.nbytes) != 0) {
+	/*
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+	while ((nbytes = walk->nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
-			     walk.dst.virt.addr, walk.src.virt.addr, n);
+		k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+			     walk->dst.virt.addr, walk->src.virt.addr, n);
 		if (k)
-			ret = skcipher_walk_done(&walk, nbytes - k);
+			rc = skcipher_walk_done(walk, nbytes - k);
 		if (k < n) {
-			if (__xts_paes_convert_key(ctx))
-				return skcipher_walk_done(&walk, -EIO);
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = pxts_convert_key(ctx);
+			if (rc)
+				goto out;
 			spin_lock_bh(&ctx->pk_lock);
-			memcpy(xts_param.key + offset,
-			       ctx->pk[0].protkey, keylen);
+			memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+			memcpy(param->wkvp, ctx->pk[0].protkey + keylen, sizeof(param->wkvp));
 			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
 
-	return ret;
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int xts_paes_encrypt(struct skcipher_request *req)
+static inline int __xts_2keys_prep_param(struct s390_pxts_ctx *ctx,
+					 struct xts_km_param *param,
+					 struct skcipher_walk *walk,
+					 unsigned int keylen,
+					 unsigned int offset, bool maysleep)
 {
-	return xts_paes_crypt(req, 0);
+	struct xts_pcc_param pcc_param;
+	unsigned long cc = 1;
+	int rc = 0;
+
+	while (cc) {
+		memset(&pcc_param, 0, sizeof(pcc_param));
+		memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+		spin_lock_bh(&ctx->pk_lock);
+		memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
+		memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+		spin_unlock_bh(&ctx->pk_lock);
+		cc = cpacf_pcc(ctx->fc, pcc_param.key + offset);
+		if (cc) {
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				break;
+			}
+			rc = pxts_convert_key(ctx);
+			if (rc)
+				break;
+			continue;
+		}
+		memcpy(param->init, pcc_param.xts, 16);
+	}
+
+	memzero_explicit(pcc_param.key, sizeof(pcc_param.key));
+	return rc;
 }
 
-static int xts_paes_decrypt(struct skcipher_request *req)
+static int xts_paes_do_crypt_2keys(struct s390_pxts_ctx *ctx,
+				   struct s390_pxts_req_ctx *req_ctx,
+				   bool maysleep)
 {
-	return xts_paes_crypt(req, CPACF_DECRYPT);
-}
+	struct xts_km_param *param = &req_ctx->param.km_param;
+	struct skcipher_walk *walk = &req_ctx->walk;
+	unsigned int keylen, offset, nbytes, n, k;
+	int rc = 0;
 
-static struct skcipher_alg xts_paes_alg = {
-	.base.cra_name		=	"xts(paes)",
-	.base.cra_driver_name	=	"xts-paes-s390",
-	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
-	.base.cra_blocksize	=	AES_BLOCK_SIZE,
-	.base.cra_ctxsize	=	sizeof(struct s390_pxts_ctx),
-	.base.cra_module	=	THIS_MODULE,
-	.base.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
-	.init			=	xts_paes_init,
-	.exit			=	xts_paes_exit,
-	.min_keysize		=	2 * PAES_MIN_KEYSIZE,
-	.max_keysize		=	2 * PAES_MAX_KEYSIZE,
-	.ivsize			=	AES_BLOCK_SIZE,
-	.setkey			=	xts_paes_set_key,
-	.encrypt		=	xts_paes_encrypt,
-	.decrypt		=	xts_paes_decrypt,
-};
+	/*
+	 * The calling function xts_paes_do_crypt() ensures the
+	 * protected key state is always PK_STATE_VALID when this
+	 * function is invoked.
+	 */
 
-static int ctr_paes_init(struct crypto_skcipher *tfm)
-{
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
+	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
 
-	ctx->kb.key = NULL;
-	spin_lock_init(&ctx->pk_lock);
+	if (!req_ctx->param_init_done) {
+		rc = __xts_2keys_prep_param(ctx, param, walk,
+					    keylen, offset, maysleep);
+		if (rc)
+			goto out;
+		req_ctx->param_init_done = true;
+	}
 
-	return 0;
+	/*
+	 * Note that in case of partial processing or failure the walk
+	 * is NOT unmapped here. So a follow up task may reuse the walk
+	 * or in case of unrecoverable failure needs to unmap it.
+	 */
+	while ((nbytes = walk->nbytes) != 0) {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		k = cpacf_km(ctx->fc | req_ctx->modifier, param->key + offset,
+			     walk->dst.virt.addr, walk->src.virt.addr, n);
+		if (k)
+			rc = skcipher_walk_done(walk, nbytes - k);
+		if (k < n) {
+			if (!maysleep) {
+				rc = -EKEYEXPIRED;
+				goto out;
+			}
+			rc = pxts_convert_key(ctx);
+			if (rc)
+				goto out;
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param->key + offset, ctx->pk[0].protkey, keylen);
+			spin_unlock_bh(&ctx->pk_lock);
+		}
+	}
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static void ctr_paes_exit(struct crypto_skcipher *tfm)
+static int xts_paes_do_crypt(struct s390_pxts_ctx *ctx,
+			     struct s390_pxts_req_ctx *req_ctx,
+			     bool maysleep)
 {
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int pk_state, rc = 0;
 
-	_free_kb_keybuf(&ctx->kb);
+	/* fetch and check protected key state */
+	spin_lock_bh(&ctx->pk_lock);
+	pk_state = ctx->pk_state;
+	switch (pk_state) {
+	case PK_STATE_NO_KEY:
+		rc = -ENOKEY;
+		break;
+	case PK_STATE_CONVERT_IN_PROGRESS:
+		rc = -EKEYEXPIRED;
+		break;
+	case PK_STATE_VALID:
+		break;
+	default:
+		rc = pk_state < 0 ? pk_state : -EIO;
+		break;
+	}
+	spin_unlock_bh(&ctx->pk_lock);
+	if (rc)
+		goto out;
+
+	/* Call the 'real' crypt function based on the xts prot key type. */
+	switch (ctx->fc) {
+	case CPACF_KM_PXTS_128:
+	case CPACF_KM_PXTS_256:
+		rc = xts_paes_do_crypt_2keys(ctx, req_ctx, maysleep);
+		break;
+	case CPACF_KM_PXTS_128_FULL:
+	case CPACF_KM_PXTS_256_FULL:
+		rc = xts_paes_do_crypt_fullkey(ctx, req_ctx, maysleep);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+out:
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
+	struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
 	int rc;
-	unsigned long fc;
 
-	rc = __paes_convert_key(ctx);
+	/*
+	 * Attempt synchronous encryption first. If it fails, schedule the request
+	 * asynchronously via the crypto engine. To preserve execution order,
+	 * once a request is queued to the engine, further requests using the same
+	 * tfm will also be routed through the engine.
+	 */
+
+	rc = skcipher_walk_virt(walk, req, false);
 	if (rc)
-		return rc;
+		goto out;
 
-	/* Pick the correct function code based on the protected key type */
-	fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KMCTR_PAES_128 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KMCTR_PAES_192 :
-		(ctx->pk.type == PKEY_KEYTYPE_AES_256) ?
-		CPACF_KMCTR_PAES_256 : 0;
+	req_ctx->modifier = modifier;
+	req_ctx->param_init_done = false;
 
-	/* Check if the function code is available */
-	ctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+	/* Try synchronous operation if no active engine usage */
+	if (!atomic_read(&ctx->via_engine_ctr)) {
+		rc = xts_paes_do_crypt(ctx, req_ctx, false);
+		if (rc == 0)
+			goto out;
+	}
+
+	/*
+	 * If sync operation failed or key expired or there are already
+	 * requests enqueued via engine, fallback to async. Mark tfm as
+	 * using engine to serialize requests.
+	 */
+	if (rc == 0 || rc == -EKEYEXPIRED) {
+		atomic_inc(&ctx->via_engine_ctr);
+		rc = crypto_transfer_skcipher_request_to_engine(paes_crypto_engine, req);
+		if (rc != -EINPROGRESS)
+			atomic_dec(&ctx->via_engine_ctr);
+	}
 
-	return ctx->fc ? 0 : -EINVAL;
+	if (rc != -EINPROGRESS)
+		skcipher_walk_done(walk, rc);
+
+out:
+	if (rc != -EINPROGRESS)
+		memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("rc=%d\n", rc);
+	return rc;
 }
 
-static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
-			    unsigned int key_len)
+static int xts_paes_encrypt(struct skcipher_request *req)
 {
-	int rc;
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	return xts_paes_crypt(req, 0);
+}
 
-	_free_kb_keybuf(&ctx->kb);
-	rc = _key_to_kb(&ctx->kb, in_key, key_len);
-	if (rc)
-		return rc;
+static int xts_paes_decrypt(struct skcipher_request *req)
+{
+	return xts_paes_crypt(req, CPACF_DECRYPT);
+}
+
+static int xts_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->pk_lock);
 
-	return __ctr_paes_set_key(ctx);
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct s390_pxts_req_ctx));
+
+	return 0;
 }
 
-static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
 {
-	unsigned int i, n;
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	/* only use complete blocks, max. PAGE_SIZE */
-	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
-	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
-	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
-		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
-		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-		ctrptr += AES_BLOCK_SIZE;
-	}
-	return n;
+	memzero_explicit(ctx, sizeof(*ctx));
 }
 
-static int ctr_paes_crypt(struct skcipher_request *req)
+static int xts_paes_do_one_request(struct crypto_engine *engine, void *areq)
 {
+	struct skcipher_request *req = skcipher_request_cast(areq);
+	struct s390_pxts_req_ctx *req_ctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u8 buf[AES_BLOCK_SIZE], *ctrptr;
-	struct skcipher_walk walk;
-	unsigned int nbytes, n, k;
-	int ret, locked;
-	struct {
-		u8 key[MAXPROTKEYSIZE];
-	} param;
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk *walk = &req_ctx->walk;
+	int rc;
 
-	ret = skcipher_walk_virt(&walk, req, false);
-	if (ret)
-		return ret;
+	/* walk has already been prepared */
+
+	rc = xts_paes_do_crypt(ctx, req_ctx, true);
+	if (rc == -EKEYEXPIRED) {
+		/*
+		 * Protected key expired, conversion is in process.
+		 * Trigger a re-schedule of this request by returning
+		 * -ENOSPC ("hardware queue is full") to the crypto engine.
+		 * To avoid immediately re-invocation of this callback,
+		 * tell the scheduler to voluntarily give up the CPU here.
+		 */
+		cond_resched();
+		pr_debug("rescheduling request\n");
+		return -ENOSPC;
+	} else if (rc) {
+		skcipher_walk_done(walk, rc);
+	}
 
-	spin_lock_bh(&ctx->pk_lock);
-	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	spin_unlock_bh(&ctx->pk_lock);
+	memzero_explicit(&req_ctx->param, sizeof(req_ctx->param));
+	pr_debug("request complete with rc=%d\n", rc);
+	local_bh_disable();
+	atomic_dec(&ctx->via_engine_ctr);
+	crypto_finalize_skcipher_request(engine, req, rc);
+	local_bh_enable();
+	return rc;
+}
 
-	locked = mutex_trylock(&ctrblk_lock);
+static struct skcipher_engine_alg xts_paes_alg = {
+	.base = {
+		.base.cra_name	      =	"xts(paes)",
+		.base.cra_driver_name =	"xts-paes-s390",
+		.base.cra_priority    =	402,	/* ecb-paes-s390 + 1 */
+		.base.cra_blocksize   =	AES_BLOCK_SIZE,
+		.base.cra_ctxsize     =	sizeof(struct s390_pxts_ctx),
+		.base.cra_module      =	THIS_MODULE,
+		.base.cra_list	      =	LIST_HEAD_INIT(xts_paes_alg.base.base.cra_list),
+		.init		      =	xts_paes_init,
+		.exit		      =	xts_paes_exit,
+		.min_keysize	      =	2 * PAES_MIN_KEYSIZE,
+		.max_keysize	      =	2 * PAES_MAX_KEYSIZE,
+		.ivsize		      =	AES_BLOCK_SIZE,
+		.setkey		      =	xts_paes_setkey,
+		.encrypt	      =	xts_paes_encrypt,
+		.decrypt	      =	xts_paes_decrypt,
+	},
+	.op = {
+		.do_one_request	      = xts_paes_do_one_request,
+	},
+};
 
-	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
-		n = AES_BLOCK_SIZE;
-		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
-		k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
-				walk.src.virt.addr, n, ctrptr);
-		if (k) {
-			if (ctrptr == ctrblk)
-				memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-			crypto_inc(walk.iv, AES_BLOCK_SIZE);
-			ret = skcipher_walk_done(&walk, nbytes - k);
-		}
-		if (k < n) {
-			if (__paes_convert_key(ctx)) {
-				if (locked)
-					mutex_unlock(&ctrblk_lock);
-				return skcipher_walk_done(&walk, -EIO);
-			}
-			spin_lock_bh(&ctx->pk_lock);
-			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-			spin_unlock_bh(&ctx->pk_lock);
-		}
-	}
-	if (locked)
-		mutex_unlock(&ctrblk_lock);
-	/*
-	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
-	 */
-	if (nbytes) {
-		while (1) {
-			if (cpacf_kmctr(ctx->fc, &param, buf,
-					walk.src.virt.addr, AES_BLOCK_SIZE,
-					walk.iv) == AES_BLOCK_SIZE)
-				break;
-			if (__paes_convert_key(ctx))
-				return skcipher_walk_done(&walk, -EIO);
-			spin_lock_bh(&ctx->pk_lock);
-			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-			spin_unlock_bh(&ctx->pk_lock);
-		}
-		memcpy(walk.dst.virt.addr, buf, nbytes);
-		crypto_inc(walk.iv, AES_BLOCK_SIZE);
-		ret = skcipher_walk_done(&walk, nbytes);
-	}
-
-	return ret;
-}
-
-static struct skcipher_alg ctr_paes_alg = {
-	.base.cra_name		=	"ctr(paes)",
-	.base.cra_driver_name	=	"ctr-paes-s390",
-	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
-	.base.cra_blocksize	=	1,
-	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
-	.base.cra_module	=	THIS_MODULE,
-	.base.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
-	.init			=	ctr_paes_init,
-	.exit			=	ctr_paes_exit,
-	.min_keysize		=	PAES_MIN_KEYSIZE,
-	.max_keysize		=	PAES_MAX_KEYSIZE,
-	.ivsize			=	AES_BLOCK_SIZE,
-	.setkey			=	ctr_paes_set_key,
-	.encrypt		=	ctr_paes_crypt,
-	.decrypt		=	ctr_paes_crypt,
-	.chunksize		=	AES_BLOCK_SIZE,
+/*
+ * alg register, unregister, module init, exit
+ */
+
+static struct miscdevice paes_dev = {
+	.name	= "paes",
+	.minor	= MISC_DYNAMIC_MINOR,
 };
 
-static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_engine_alg *alg)
 {
-	if (!list_empty(&alg->base.cra_list))
-		crypto_unregister_skcipher(alg);
+	if (!list_empty(&alg->base.base.cra_list))
+		crypto_engine_unregister_skcipher(alg);
 }
 
 static void paes_s390_fini(void)
 {
+	if (paes_crypto_engine) {
+		crypto_engine_stop(paes_crypto_engine);
+		crypto_engine_exit(paes_crypto_engine);
+	}
 	__crypto_unregister_skcipher(&ctr_paes_alg);
 	__crypto_unregister_skcipher(&xts_paes_alg);
 	__crypto_unregister_skcipher(&cbc_paes_alg);
 	__crypto_unregister_skcipher(&ecb_paes_alg);
 	if (ctrblk)
-		free_page((unsigned long) ctrblk);
+		free_page((unsigned long)ctrblk);
+	misc_deregister(&paes_dev);
 }
 
 static int __init paes_s390_init(void)
 {
-	int ret;
+	int rc;
+
+	/* register a simple paes pseudo misc device */
+	rc = misc_register(&paes_dev);
+	if (rc)
+		return rc;
+
+	/* with this pseudo devie alloc and start a crypto engine */
+	paes_crypto_engine =
+		crypto_engine_alloc_init_and_set(paes_dev.this_device,
+						 true, NULL, false, MAX_QLEN);
+	if (!paes_crypto_engine) {
+		rc = -ENOMEM;
+		goto out_err;
+	}
+	rc = crypto_engine_start(paes_crypto_engine);
+	if (rc) {
+		crypto_engine_exit(paes_crypto_engine);
+		paes_crypto_engine = NULL;
+		goto out_err;
+	}
 
 	/* Query available functions for KM, KMC and KMCTR */
 	cpacf_query(CPACF_KM, &km_functions);
@@ -754,49 +1653,57 @@ static int __init paes_s390_init(void)
 	if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
-		ret = crypto_register_skcipher(&ecb_paes_alg);
-		if (ret)
+		rc = crypto_engine_register_skcipher(&ecb_paes_alg);
+		if (rc)
 			goto out_err;
+		pr_debug("%s registered\n", ecb_paes_alg.base.base.cra_driver_name);
 	}
 
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
-		ret = crypto_register_skcipher(&cbc_paes_alg);
-		if (ret)
+		rc = crypto_engine_register_skcipher(&cbc_paes_alg);
+		if (rc)
 			goto out_err;
+		pr_debug("%s registered\n", cbc_paes_alg.base.base.cra_driver_name);
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
-		ret = crypto_register_skcipher(&xts_paes_alg);
-		if (ret)
+		rc = crypto_engine_register_skcipher(&xts_paes_alg);
+		if (rc)
 			goto out_err;
+		pr_debug("%s registered\n", xts_paes_alg.base.base.cra_driver_name);
 	}
 
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
-		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		ctrblk = (u8 *)__get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
-			ret = -ENOMEM;
+			rc = -ENOMEM;
 			goto out_err;
 		}
-		ret = crypto_register_skcipher(&ctr_paes_alg);
-		if (ret)
+		rc = crypto_engine_register_skcipher(&ctr_paes_alg);
+		if (rc)
 			goto out_err;
+		pr_debug("%s registered\n", ctr_paes_alg.base.base.cra_driver_name);
 	}
 
 	return 0;
+
 out_err:
 	paes_s390_fini();
-	return ret;
+	return rc;
 }
 
 module_init(paes_s390_init);
 module_exit(paes_s390_fini);
 
-MODULE_ALIAS_CRYPTO("paes");
+MODULE_ALIAS_CRYPTO("ecb(paes)");
+MODULE_ALIAS_CRYPTO("cbc(paes)");
+MODULE_ALIAS_CRYPTO("ctr(paes)");
+MODULE_ALIAS_CRYPTO("xts(paes)");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm with protected keys");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index ae382bafc772..2becd77df741 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -679,7 +679,7 @@ static ssize_t prng_chunksize_show(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+	return sysfs_emit(buf, "%u\n", prng_chunk_size);
 }
 static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
 
@@ -698,7 +698,7 @@ static ssize_t prng_counter_show(struct device *dev,
 		counter = prng_data->prngws.byte_counter;
 	mutex_unlock(&prng_data->mutex);
 
-	return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
+	return sysfs_emit(buf, "%llu\n", counter);
 }
 static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
 
@@ -707,7 +707,7 @@ static ssize_t prng_errorflag_show(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+	return sysfs_emit(buf, "%d\n", prng_errorflag);
 }
 static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
 
@@ -717,9 +717,9 @@ static ssize_t prng_mode_show(struct device *dev,
 			      char *buf)
 {
 	if (prng_mode == PRNG_MODE_TDES)
-		return scnprintf(buf, PAGE_SIZE, "TDES\n");
+		return sysfs_emit(buf, "TDES\n");
 	else
-		return scnprintf(buf, PAGE_SIZE, "SHA512\n");
+		return sysfs_emit(buf, "SHA512\n");
 }
 static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
 
@@ -742,7 +742,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev,
 				      struct device_attribute *attr,
 				      char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+	return sysfs_emit(buf, "%u\n", prng_reseed_limit);
 }
 static ssize_t prng_reseed_limit_store(struct device *dev,
 				       struct device_attribute *attr,
@@ -773,7 +773,7 @@ static ssize_t prng_strength_show(struct device *dev,
 				  struct device_attribute *attr,
 				  char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE, "256\n");
+	return sysfs_emit(buf, "256\n");
 }
 static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
 
@@ -907,5 +907,5 @@ static void __exit prng_exit(void)
 	}
 }
 
-module_cpu_feature_match(MSA, prng_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
 module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 65ea12fc87a1..d757ccbce2b4 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -10,26 +10,33 @@
 #ifndef _CRYPTO_ARCH_S390_SHA_H
 #define _CRYPTO_ARCH_S390_SHA_H
 
-#include <linux/crypto.h>
-#include <crypto/sha1.h>
 #include <crypto/sha2.h>
 #include <crypto/sha3.h>
+#include <linux/types.h>
 
 /* must be big enough for the largest SHA variant */
-#define SHA3_STATE_SIZE			200
 #define CPACF_MAX_PARMBLOCK_SIZE	SHA3_STATE_SIZE
 #define SHA_MAX_BLOCK_SIZE		SHA3_224_BLOCK_SIZE
+#define S390_SHA_CTX_SIZE		sizeof(struct s390_sha_ctx)
 
 struct s390_sha_ctx {
 	u64 count;		/* message length in bytes */
-	u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
-	u8 buf[SHA_MAX_BLOCK_SIZE];
+	union {
+		u32 state[CPACF_MAX_PARMBLOCK_SIZE / sizeof(u32)];
+		struct {
+			u64 state[SHA512_DIGEST_SIZE / sizeof(u64)];
+			u64 count_hi;
+		} sha512;
+	};
 	int func;		/* KIMD function to use */
+	bool first_message_part;
 };
 
 struct shash_desc;
 
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
-int s390_sha_final(struct shash_desc *desc, u8 *out);
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+			   unsigned int len);
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+		   u8 *out);
 
 #endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a3fabf310a38..d229cbd2ba22 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -18,12 +18,12 @@
  *   Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
  *   Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
  */
+#include <asm/cpacf.h>
 #include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
 #include <crypto/sha1.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 
 #include "sha.h"
 
@@ -49,7 +49,6 @@ static int s390_sha1_export(struct shash_desc *desc, void *out)
 
 	octx->count = sctx->count;
 	memcpy(octx->state, sctx->state, sizeof(octx->state));
-	memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
 	return 0;
 }
 
@@ -60,7 +59,6 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
 
 	sctx->count = ictx->count;
 	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
-	memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
 	sctx->func = CPACF_KIMD_SHA_1;
 	return 0;
 }
@@ -68,16 +66,18 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in)
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	s390_sha1_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	s390_sha1_export,
 	.import		=	s390_sha1_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha1_state),
+	.descsize	=	S390_SHA_CTX_SIZE,
+	.statesize	=	SHA1_STATE_SIZE,
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-s390",
 		.cra_priority	=	300,
+		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					CRYPTO_AHASH_ALG_FINUP_MAX,
 		.cra_blocksize	=	SHA1_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
@@ -95,7 +95,7 @@ static void __exit sha1_s390_fini(void)
 	crypto_unregister_shash(&alg);
 }
 
-module_cpu_feature_match(MSA, sha1_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
 module_exit(sha1_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
deleted file mode 100644
index 24983f175676..000000000000
--- a/arch/s390/crypto/sha256_s390.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Cryptographic API.
- *
- * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
- *
- * s390 Version:
- *   Copyright IBM Corp. 2005, 2011
- *   Author(s): Jan Glauber (jang@de.ibm.com)
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <crypto/sha2.h>
-#include <asm/cpacf.h>
-
-#include "sha.h"
-
-static int s390_sha256_init(struct shash_desc *desc)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-	sctx->func = CPACF_KIMD_SHA_256;
-
-	return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	struct sha256_state *octx = out;
-
-	octx->count = sctx->count;
-	memcpy(octx->state, sctx->state, sizeof(octx->state));
-	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
-	return 0;
-}
-
-static int sha256_import(struct shash_desc *desc, const void *in)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha256_state *ictx = in;
-
-	sctx->count = ictx->count;
-	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
-	sctx->func = CPACF_KIMD_SHA_256;
-	return 0;
-}
-
-static struct shash_alg sha256_alg = {
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	s390_sha256_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
-	.export		=	sha256_export,
-	.import		=	sha256_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha256_state),
-	.base		=	{
-		.cra_name	=	"sha256",
-		.cra_driver_name=	"sha256-s390",
-		.cra_priority	=	300,
-		.cra_blocksize	=	SHA256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static int s390_sha224_init(struct shash_desc *desc)
-{
-	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA224_H0;
-	sctx->state[1] = SHA224_H1;
-	sctx->state[2] = SHA224_H2;
-	sctx->state[3] = SHA224_H3;
-	sctx->state[4] = SHA224_H4;
-	sctx->state[5] = SHA224_H5;
-	sctx->state[6] = SHA224_H6;
-	sctx->state[7] = SHA224_H7;
-	sctx->count = 0;
-	sctx->func = CPACF_KIMD_SHA_256;
-
-	return 0;
-}
-
-static struct shash_alg sha224_alg = {
-	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	s390_sha224_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
-	.export		=	sha256_export,
-	.import		=	sha256_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha256_state),
-	.base		=	{
-		.cra_name	=	"sha224",
-		.cra_driver_name=	"sha224-s390",
-		.cra_priority	=	300,
-		.cra_blocksize	=	SHA224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static int __init sha256_s390_init(void)
-{
-	int ret;
-
-	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
-		return -ENODEV;
-	ret = crypto_register_shash(&sha256_alg);
-	if (ret < 0)
-		goto out;
-	ret = crypto_register_shash(&sha224_alg);
-	if (ret < 0)
-		crypto_unregister_shash(&sha256_alg);
-out:
-	return ret;
-}
-
-static void __exit sha256_s390_fini(void)
-{
-	crypto_unregister_shash(&sha224_alg);
-	crypto_unregister_shash(&sha256_alg);
-}
-
-module_cpu_feature_match(MSA, sha256_s390_init);
-module_exit(sha256_s390_fini);
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 30ac49b635bf..4a7731ac6bcd 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -8,12 +8,14 @@
  *   Copyright IBM Corp. 2019
  *   Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
  */
+#include <asm/cpacf.h>
 #include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
 #include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
 
 #include "sha.h"
 
@@ -21,7 +23,9 @@ static int sha3_256_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
-	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->first_message_part = test_facility(86);
+	if (!sctx->first_message_part)
+		memset(sctx->state, 0, sizeof(sctx->state));
 	sctx->count = 0;
 	sctx->func = CPACF_KIMD_SHA3_256;
 
@@ -33,10 +37,11 @@ static int sha3_256_export(struct shash_desc *desc, void *out)
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha3_state *octx = out;
 
-	octx->rsiz = sctx->count;
+	if (sctx->first_message_part) {
+		memset(sctx->state, 0, sizeof(sctx->state));
+		sctx->first_message_part = 0;
+	}
 	memcpy(octx->st, sctx->state, sizeof(octx->st));
-	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
-
 	return 0;
 }
 
@@ -45,9 +50,9 @@ static int sha3_256_import(struct shash_desc *desc, const void *in)
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	const struct sha3_state *ictx = in;
 
-	sctx->count = ictx->rsiz;
+	sctx->count = 0;
 	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->first_message_part = 0;
 	sctx->func = CPACF_KIMD_SHA3_256;
 
 	return 0;
@@ -56,29 +61,26 @@ static int sha3_256_import(struct shash_desc *desc, const void *in)
 static int sha3_224_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha3_state *ictx = in;
 
-	sctx->count = ictx->rsiz;
-	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sha3_256_import(desc, in);
 	sctx->func = CPACF_KIMD_SHA3_224;
-
 	return 0;
 }
 
 static struct shash_alg sha3_256_alg = {
 	.digestsize	=	SHA3_256_DIGEST_SIZE,	   /* = 32 */
 	.init		=	sha3_256_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha3_256_export,
 	.import		=	sha3_256_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha3_state),
+	.descsize	=	S390_SHA_CTX_SIZE,
+	.statesize	=	SHA3_STATE_SIZE,
 	.base		=	{
 		.cra_name	 =	"sha3-256",
 		.cra_driver_name =	"sha3-256-s390",
 		.cra_priority	 =	300,
+		.cra_flags	 =	CRYPTO_AHASH_ALG_BLOCK_ONLY,
 		.cra_blocksize	 =	SHA3_256_BLOCK_SIZE,
 		.cra_module	 =	THIS_MODULE,
 	}
@@ -88,26 +90,25 @@ static int sha3_224_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
-	memset(sctx->state, 0, sizeof(sctx->state));
-	sctx->count = 0;
+	sha3_256_init(desc);
 	sctx->func = CPACF_KIMD_SHA3_224;
-
 	return 0;
 }
 
 static struct shash_alg sha3_224_alg = {
 	.digestsize	=	SHA3_224_DIGEST_SIZE,
 	.init		=	sha3_224_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha3_256_export, /* same as for 256 */
 	.import		=	sha3_224_import, /* function code different! */
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha3_state),
+	.descsize	=	S390_SHA_CTX_SIZE,
+	.statesize	=	SHA3_STATE_SIZE,
 	.base		=	{
 		.cra_name	 =	"sha3-224",
 		.cra_driver_name =	"sha3-224-s390",
 		.cra_priority	 =	300,
+		.cra_flags	 =	CRYPTO_AHASH_ALG_BLOCK_ONLY,
 		.cra_blocksize	 =	SHA3_224_BLOCK_SIZE,
 		.cra_module	 =	THIS_MODULE,
 	}
@@ -137,7 +138,7 @@ static void __exit sha3_256_s390_fini(void)
 	crypto_unregister_shash(&sha3_256_alg);
 }
 
-module_cpu_feature_match(MSA, sha3_256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
 module_exit(sha3_256_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha3-256");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index e70d50f7620f..018f02fff444 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -7,12 +7,14 @@
  * Copyright IBM Corp. 2019
  * Author(s): Joerg Schmidbauer (jschmidb@de.ibm.com)
  */
+#include <asm/cpacf.h>
 #include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/cpufeature.h>
 #include <crypto/sha3.h>
-#include <asm/cpacf.h>
+#include <linux/cpufeature.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
 
 #include "sha.h"
 
@@ -20,7 +22,9 @@ static int sha3_512_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
-	memset(sctx->state, 0, sizeof(sctx->state));
+	sctx->first_message_part = test_facility(86);
+	if (!sctx->first_message_part)
+		memset(sctx->state, 0, sizeof(sctx->state));
 	sctx->count = 0;
 	sctx->func = CPACF_KIMD_SHA3_512;
 
@@ -32,12 +36,12 @@ static int sha3_512_export(struct shash_desc *desc, void *out)
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha3_state *octx = out;
 
-	octx->rsiz = sctx->count;
-	octx->rsizw = sctx->count >> 32;
 
+	if (sctx->first_message_part) {
+		memset(sctx->state, 0, sizeof(sctx->state));
+		sctx->first_message_part = 0;
+	}
 	memcpy(octx->st, sctx->state, sizeof(octx->st));
-	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
-
 	return 0;
 }
 
@@ -46,12 +50,9 @@ static int sha3_512_import(struct shash_desc *desc, const void *in)
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	const struct sha3_state *ictx = in;
 
-	if (unlikely(ictx->rsizw))
-		return -ERANGE;
-	sctx->count = ictx->rsiz;
-
+	sctx->count = 0;
 	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->first_message_part = 0;
 	sctx->func = CPACF_KIMD_SHA3_512;
 
 	return 0;
@@ -60,32 +61,26 @@ static int sha3_512_import(struct shash_desc *desc, const void *in)
 static int sha3_384_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
-	const struct sha3_state *ictx = in;
-
-	if (unlikely(ictx->rsizw))
-		return -ERANGE;
-	sctx->count = ictx->rsiz;
 
-	memcpy(sctx->state, ictx->st, sizeof(ictx->st));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sha3_512_import(desc, in);
 	sctx->func = CPACF_KIMD_SHA3_384;
-
 	return 0;
 }
 
 static struct shash_alg sha3_512_alg = {
 	.digestsize	=	SHA3_512_DIGEST_SIZE,
 	.init		=	sha3_512_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha3_512_export,
 	.import		=	sha3_512_import,
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha3_state),
+	.descsize	=	S390_SHA_CTX_SIZE,
+	.statesize	=	SHA3_STATE_SIZE,
 	.base		=	{
 		.cra_name	 =	"sha3-512",
 		.cra_driver_name =	"sha3-512-s390",
 		.cra_priority	 =	300,
+		.cra_flags	 =	CRYPTO_AHASH_ALG_BLOCK_ONLY,
 		.cra_blocksize	 =	SHA3_512_BLOCK_SIZE,
 		.cra_module	 =	THIS_MODULE,
 	}
@@ -97,26 +92,25 @@ static int sha3_384_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
-	memset(sctx->state, 0, sizeof(sctx->state));
-	sctx->count = 0;
+	sha3_512_init(desc);
 	sctx->func = CPACF_KIMD_SHA3_384;
-
 	return 0;
 }
 
 static struct shash_alg sha3_384_alg = {
 	.digestsize	=	SHA3_384_DIGEST_SIZE,
 	.init		=	sha3_384_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha3_512_export, /* same as for 512 */
 	.import		=	sha3_384_import, /* function code different! */
-	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha3_state),
+	.descsize	=	S390_SHA_CTX_SIZE,
+	.statesize	=	SHA3_STATE_SIZE,
 	.base		=	{
 		.cra_name	 =	"sha3-384",
 		.cra_driver_name =	"sha3-384-s390",
 		.cra_priority	 =	300,
+		.cra_flags	 =	CRYPTO_AHASH_ALG_BLOCK_ONLY,
 		.cra_blocksize	 =	SHA3_384_BLOCK_SIZE,
 		.cra_ctxsize	 =	sizeof(struct s390_sha_ctx),
 		.cra_module	 =	THIS_MODULE,
@@ -147,7 +141,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha3_384_alg);
 }
 
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 43ce4956df73..33711a29618c 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -7,14 +7,13 @@
  * Copyright IBM Corp. 2007
  * Author(s): Jan Glauber (jang@de.ibm.com)
  */
+#include <asm/cpacf.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha2.h>
+#include <linux/cpufeature.h>
 #include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <asm/cpacf.h>
 
 #include "sha.h"
 
@@ -22,15 +21,16 @@ static int sha512_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 
-	*(__u64 *)&ctx->state[0] = SHA512_H0;
-	*(__u64 *)&ctx->state[2] = SHA512_H1;
-	*(__u64 *)&ctx->state[4] = SHA512_H2;
-	*(__u64 *)&ctx->state[6] = SHA512_H3;
-	*(__u64 *)&ctx->state[8] = SHA512_H4;
-	*(__u64 *)&ctx->state[10] = SHA512_H5;
-	*(__u64 *)&ctx->state[12] = SHA512_H6;
-	*(__u64 *)&ctx->state[14] = SHA512_H7;
+	ctx->sha512.state[0] = SHA512_H0;
+	ctx->sha512.state[1] = SHA512_H1;
+	ctx->sha512.state[2] = SHA512_H2;
+	ctx->sha512.state[3] = SHA512_H3;
+	ctx->sha512.state[4] = SHA512_H4;
+	ctx->sha512.state[5] = SHA512_H5;
+	ctx->sha512.state[6] = SHA512_H6;
+	ctx->sha512.state[7] = SHA512_H7;
 	ctx->count = 0;
+	ctx->sha512.count_hi = 0;
 	ctx->func = CPACF_KIMD_SHA_512;
 
 	return 0;
@@ -42,9 +42,8 @@ static int sha512_export(struct shash_desc *desc, void *out)
 	struct sha512_state *octx = out;
 
 	octx->count[0] = sctx->count;
-	octx->count[1] = 0;
+	octx->count[1] = sctx->sha512.count_hi;
 	memcpy(octx->state, sctx->state, sizeof(octx->state));
-	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
 	return 0;
 }
 
@@ -53,12 +52,10 @@ static int sha512_import(struct shash_desc *desc, const void *in)
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	const struct sha512_state *ictx = in;
 
-	if (unlikely(ictx->count[1]))
-		return -ERANGE;
 	sctx->count = ictx->count[0];
+	sctx->sha512.count_hi = ictx->count[1];
 
 	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
-	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
 	sctx->func = CPACF_KIMD_SHA_512;
 	return 0;
 }
@@ -66,16 +63,18 @@ static int sha512_import(struct shash_desc *desc, const void *in)
 static struct shash_alg sha512_alg = {
 	.digestsize	=	SHA512_DIGEST_SIZE,
 	.init		=	sha512_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha512_export,
 	.import		=	sha512_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha512_state),
+	.statesize	=	SHA512_STATE_SIZE,
 	.base		=	{
 		.cra_name	=	"sha512",
 		.cra_driver_name=	"sha512-s390",
 		.cra_priority	=	300,
+		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					CRYPTO_AHASH_ALG_FINUP_MAX,
 		.cra_blocksize	=	SHA512_BLOCK_SIZE,
 		.cra_module	=	THIS_MODULE,
 	}
@@ -87,15 +86,16 @@ static int sha384_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 
-	*(__u64 *)&ctx->state[0] = SHA384_H0;
-	*(__u64 *)&ctx->state[2] = SHA384_H1;
-	*(__u64 *)&ctx->state[4] = SHA384_H2;
-	*(__u64 *)&ctx->state[6] = SHA384_H3;
-	*(__u64 *)&ctx->state[8] = SHA384_H4;
-	*(__u64 *)&ctx->state[10] = SHA384_H5;
-	*(__u64 *)&ctx->state[12] = SHA384_H6;
-	*(__u64 *)&ctx->state[14] = SHA384_H7;
+	ctx->sha512.state[0] = SHA384_H0;
+	ctx->sha512.state[1] = SHA384_H1;
+	ctx->sha512.state[2] = SHA384_H2;
+	ctx->sha512.state[3] = SHA384_H3;
+	ctx->sha512.state[4] = SHA384_H4;
+	ctx->sha512.state[5] = SHA384_H5;
+	ctx->sha512.state[6] = SHA384_H6;
+	ctx->sha512.state[7] = SHA384_H7;
 	ctx->count = 0;
+	ctx->sha512.count_hi = 0;
 	ctx->func = CPACF_KIMD_SHA_512;
 
 	return 0;
@@ -104,17 +104,19 @@ static int sha384_init(struct shash_desc *desc)
 static struct shash_alg sha384_alg = {
 	.digestsize	=	SHA384_DIGEST_SIZE,
 	.init		=	sha384_init,
-	.update		=	s390_sha_update,
-	.final		=	s390_sha_final,
+	.update		=	s390_sha_update_blocks,
+	.finup		=	s390_sha_finup,
 	.export		=	sha512_export,
 	.import		=	sha512_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
-	.statesize	=	sizeof(struct sha512_state),
+	.statesize	=	SHA512_STATE_SIZE,
 	.base		=	{
 		.cra_name	=	"sha384",
 		.cra_driver_name=	"sha384-s390",
 		.cra_priority	=	300,
 		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					CRYPTO_AHASH_ALG_FINUP_MAX,
 		.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
 		.cra_module	=	THIS_MODULE,
 	}
@@ -142,7 +144,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha384_alg);
 }
 
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 686fe7aa192f..b5e2c365ea05 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -13,42 +13,33 @@
 #include <asm/cpacf.h>
 #include "sha.h"
 
-int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+int s390_sha_update_blocks(struct shash_desc *desc, const u8 *data,
+			   unsigned int len)
 {
-	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
-	unsigned int index, n;
-
-	/* how much is already in the buffer? */
-	index = ctx->count % bsize;
-	ctx->count += len;
-
-	if ((index + len) < bsize)
-		goto store;
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int n;
+	int fc;
 
-	/* process one stored block */
-	if (index) {
-		memcpy(ctx->buf + index, data, bsize - index);
-		cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
-		data += bsize - index;
-		len -= bsize - index;
-		index = 0;
-	}
+	fc = ctx->func;
+	if (ctx->first_message_part)
+		fc |= CPACF_KIMD_NIP;
 
 	/* process as many blocks as possible */
-	if (len >= bsize) {
-		n = (len / bsize) * bsize;
-		cpacf_kimd(ctx->func, ctx->state, data, n);
-		data += n;
-		len -= n;
+	n = (len / bsize) * bsize;
+	ctx->count += n;
+	switch (ctx->func) {
+	case CPACF_KLMD_SHA_512:
+	case CPACF_KLMD_SHA3_384:
+		if (ctx->count < n)
+			ctx->sha512.count_hi++;
+		break;
 	}
-store:
-	if (len)
-		memcpy(ctx->buf + index , data, len);
-
-	return 0;
+	cpacf_kimd(fc, ctx->state, data, n);
+	ctx->first_message_part = 0;
+	return len - n;
 }
-EXPORT_SYMBOL_GPL(s390_sha_update);
+EXPORT_SYMBOL_GPL(s390_sha_update_blocks);
 
 static int s390_crypto_shash_parmsize(int func)
 {
@@ -69,15 +60,15 @@ static int s390_crypto_shash_parmsize(int func)
 	}
 }
 
-int s390_sha_final(struct shash_desc *desc, u8 *out)
+int s390_sha_finup(struct shash_desc *desc, const u8 *src, unsigned int len,
+		   u8 *out)
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
-	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+	int mbl_offset, fc;
 	u64 bits;
-	unsigned int n;
-	int mbl_offset;
 
-	n = ctx->count % bsize;
+	ctx->count += len;
+
 	bits = ctx->count * 8;
 	mbl_offset = s390_crypto_shash_parmsize(ctx->func);
 	if (mbl_offset < 0)
@@ -87,17 +78,16 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 
 	/* set total msg bit length (mbl) in CPACF parmblock */
 	switch (ctx->func) {
-	case CPACF_KLMD_SHA_1:
-	case CPACF_KLMD_SHA_256:
-		memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
-		break;
 	case CPACF_KLMD_SHA_512:
-		/*
-		 * the SHA512 parmblock has a 128-bit mbl field, clear
-		 * high-order u64 field, copy bits to low-order u64 field
-		 */
-		memset(ctx->state + mbl_offset, 0x00, sizeof(bits));
+		/* The SHA512 parmblock has a 128-bit mbl field. */
+		if (ctx->count < len)
+			ctx->sha512.count_hi++;
+		ctx->sha512.count_hi <<= 3;
+		ctx->sha512.count_hi |= ctx->count >> 61;
 		mbl_offset += sizeof(u64) / sizeof(u32);
+		fallthrough;
+	case CPACF_KLMD_SHA_1:
+	case CPACF_KLMD_SHA_256:
 		memcpy(ctx->state + mbl_offset, &bits, sizeof(bits));
 		break;
 	case CPACF_KLMD_SHA3_224:
@@ -109,16 +99,18 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 		return -EINVAL;
 	}
 
-	cpacf_klmd(ctx->func, ctx->state, ctx->buf, n);
+	fc = ctx->func;
+	fc |= test_facility(86) ? CPACF_KLMD_DUFOP : 0;
+	if (ctx->first_message_part)
+		fc |= CPACF_KLMD_NIP;
+	cpacf_klmd(fc, ctx->state, src, len);
 
 	/* copy digest to out */
 	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
-	/* wipe context */
-	memset(ctx, 0, sizeof *ctx);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(s390_sha_final);
+EXPORT_SYMBOL_GPL(s390_sha_finup);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 06f601509ce9..c34854d298f8 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -3,7 +3,12 @@
 # Makefile for the linux hypfs filesystem routines.
 #
 
-obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_dbfs.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_sprp.o
+obj-$(CONFIG_S390_HYPFS)	+= hypfs_vm.o
 
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
-s390_hypfs-objs += hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_diag_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs_vm_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)	+= inode.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index 05f3f9aee5fc..83ebf54cca6b 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -46,6 +46,15 @@ void hypfs_diag0c_exit(void);
 void hypfs_sprp_init(void);
 void hypfs_sprp_exit(void);
 
+int __hypfs_fs_init(void);
+
+static inline int hypfs_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_fs_init();
+	return 0;
+}
+
 /* debugfs interface */
 struct hypfs_dbfs_file;
 
@@ -69,8 +78,6 @@ struct hypfs_dbfs_file {
 	struct dentry		*dentry;
 };
 
-extern void hypfs_dbfs_init(void);
-extern void hypfs_dbfs_exit(void);
 extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
 extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
 
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index f4c7dbfaf8ee..5d9effb0867c 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -39,7 +39,9 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
 		return 0;
 
 	df = file_inode(file)->i_private;
-	mutex_lock(&df->lock);
+	if (mutex_lock_interruptible(&df->lock))
+		return -ERESTARTSYS;
+
 	data = hypfs_dbfs_data_alloc(df);
 	if (!data) {
 		mutex_unlock(&df->lock);
@@ -74,7 +76,6 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 static const struct file_operations dbfs_ops = {
 	.read		= dbfs_read,
-	.llseek		= no_llseek,
 	.unlocked_ioctl = dbfs_ioctl,
 };
 
@@ -90,12 +91,33 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
 	debugfs_remove(df->dentry);
 }
 
-void hypfs_dbfs_init(void)
+static int __init hypfs_dbfs_init(void)
 {
-	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
-}
+	int rc = -ENODATA;
 
-void hypfs_dbfs_exit(void)
-{
+	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+	if (hypfs_diag_init())
+		goto fail_dbfs_exit;
+	if (hypfs_vm_init())
+		goto fail_hypfs_diag_exit;
+	hypfs_sprp_init();
+	if (hypfs_diag0c_init())
+		goto fail_hypfs_sprp_exit;
+	rc = hypfs_fs_init();
+	if (rc)
+		goto fail_hypfs_diag0c_exit;
+	return 0;
+
+fail_hypfs_diag0c_exit:
+	hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+	hypfs_sprp_exit();
+	hypfs_vm_exit();
+fail_hypfs_diag_exit:
+	hypfs_diag_exit();
+	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+fail_dbfs_exit:
 	debugfs_remove(dbfs_dir);
+	return rc;
 }
+device_initcall(hypfs_dbfs_init)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index f0bc4dc3e9bf..c8af67d20994 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -18,196 +18,25 @@
 #include <linux/mm.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
+#include "hypfs_diag.h"
 #include "hypfs.h"
 
-#define TMP_SIZE 64		/* size of temporary buffers */
-
 #define DBFS_D204_HDR_VERSION	0
 
-static char *diag224_cpu_names;			/* diag 224 name table */
 static enum diag204_sc diag204_store_sc;	/* used subcode for store */
 static enum diag204_format diag204_info_type;	/* used diag 204 data format */
 
 static void *diag204_buf;		/* 4K aligned buffer for diag204 data */
-static void *diag204_buf_vmalloc;	/* vmalloc pointer for diag204 data */
 static int diag204_buf_pages;		/* number of pages for diag204 data */
 
-static struct dentry *dbfs_d204_file;
-
-/*
- * DIAG 204 member access functions.
- *
- * Since we have two different diag 204 data formats for old and new s390
- * machines, we do not access the structs directly, but use getter functions for
- * each struct member instead. This should make the code more readable.
- */
-
-/* Time information block */
-
-static inline int info_blk_hdr__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_info_blk_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_info_blk_hdr);
-}
-
-static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+enum diag204_format diag204_get_info_type(void)
 {
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_info_blk_hdr *)hdr)->npar;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+	return diag204_info_type;
 }
 
-static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+static void diag204_set_info_type(enum diag204_format type)
 {
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_info_blk_hdr *)hdr)->flags;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
-}
-
-static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus;
-}
-
-/* Partition header */
-
-static inline int part_hdr__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_part_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_part_hdr);
-}
-
-static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_part_hdr *)hdr)->cpus;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
-}
-
-static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
-				       char *name)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
-		       DIAG204_LPAR_NAME_LEN);
-	else /* DIAG204_INFO_EXT */
-		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
-		       DIAG204_LPAR_NAME_LEN);
-	EBCASC(name, DIAG204_LPAR_NAME_LEN);
-	name[DIAG204_LPAR_NAME_LEN] = 0;
-	strim(name);
-}
-
-/* CPU info block */
-
-static inline int cpu_info__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_cpu_info);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_cpu_info);
-}
-
-static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->ctidx;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
-}
-
-static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
-}
-
-static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->acc_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
-}
-
-static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_cpu_info *)hdr)->lp_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
-}
-
-static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return 0;	/* online_time not available in simple info */
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_cpu_info *)hdr)->online_time;
-}
-
-/* Physical header */
-
-static inline int phys_hdr__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_phys_hdr);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_phys_hdr);
-}
-
-static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_hdr *)hdr)->cpus;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
-}
-
-/* Physical CPU info block */
-
-static inline int phys_cpu__size(enum diag204_format type)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return sizeof(struct diag204_phys_cpu);
-	else /* DIAG204_INFO_EXT */
-		return sizeof(struct diag204_x_phys_cpu);
-}
-
-static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
-}
-
-static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
-}
-
-static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
-{
-	if (type == DIAG204_INFO_SIMPLE)
-		return ((struct diag204_phys_cpu *)hdr)->ctidx;
-	else /* DIAG204_INFO_EXT */
-		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+	diag204_info_type = type;
 }
 
 /* Diagnose 204 functions */
@@ -220,43 +49,11 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 static void diag204_free_buffer(void)
 {
-	if (!diag204_buf)
-		return;
-	if (diag204_buf_vmalloc) {
-		vfree(diag204_buf_vmalloc);
-		diag204_buf_vmalloc = NULL;
-	} else {
-		free_pages((unsigned long) diag204_buf, 0);
-	}
+	vfree(diag204_buf);
 	diag204_buf = NULL;
 }
 
-static void *page_align_ptr(void *ptr)
-{
-	return (void *) PAGE_ALIGN((unsigned long) ptr);
-}
-
-static void *diag204_alloc_vbuf(int pages)
-{
-	/* The buffer has to be page aligned! */
-	diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1)));
-	if (!diag204_buf_vmalloc)
-		return ERR_PTR(-ENOMEM);
-	diag204_buf = page_align_ptr(diag204_buf_vmalloc);
-	diag204_buf_pages = pages;
-	return diag204_buf;
-}
-
-static void *diag204_alloc_rbuf(void)
-{
-	diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
-	if (!diag204_buf)
-		return ERR_PTR(-ENOMEM);
-	diag204_buf_pages = 1;
-	return diag204_buf;
-}
-
-static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 {
 	if (diag204_buf) {
 		*pages = diag204_buf_pages;
@@ -264,15 +61,19 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 	}
 	if (fmt == DIAG204_INFO_SIMPLE) {
 		*pages = 1;
-		return diag204_alloc_rbuf();
 	} else {/* DIAG204_INFO_EXT */
 		*pages = diag204((unsigned long)DIAG204_SUBC_RSI |
 				 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
 		if (*pages <= 0)
-			return ERR_PTR(-ENOSYS);
-		else
-			return diag204_alloc_vbuf(*pages);
+			return ERR_PTR(-EOPNOTSUPP);
 	}
+	diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return ERR_PTR(-ENOMEM);
+	diag204_buf_pages = *pages;
+	return diag204_buf;
 }
 
 /*
@@ -299,13 +100,13 @@ static int diag204_probe(void)
 		if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
 			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = DIAG204_SUBC_STIB7;
-			diag204_info_type = DIAG204_INFO_EXT;
+			diag204_set_info_type(DIAG204_INFO_EXT);
 			goto out;
 		}
 		if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
 			    (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
 			diag204_store_sc = DIAG204_SUBC_STIB6;
-			diag204_info_type = DIAG204_INFO_EXT;
+			diag204_set_info_type(DIAG204_INFO_EXT);
 			goto out;
 		}
 		diag204_free_buffer();
@@ -321,10 +122,10 @@ static int diag204_probe(void)
 	if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
 		    (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
 		diag204_store_sc = DIAG204_SUBC_STIB4;
-		diag204_info_type = DIAG204_INFO_SIMPLE;
+		diag204_set_info_type(DIAG204_INFO_SIMPLE);
 		goto out;
 	} else {
-		rc = -ENOSYS;
+		rc = -EOPNOTSUPP;
 		goto fail_store;
 	}
 out:
@@ -335,58 +136,24 @@ fail_alloc:
 	return rc;
 }
 
-static int diag204_do_store(void *buf, int pages)
+int diag204_store(void *buf, int pages)
 {
+	unsigned long subcode;
 	int rc;
 
-	rc = diag204((unsigned long) diag204_store_sc |
-		     (unsigned long) diag204_info_type, pages, buf);
-	return rc < 0 ? -ENOSYS : 0;
-}
-
-static void *diag204_store(void)
-{
-	void *buf;
-	int pages, rc;
-
-	buf = diag204_get_buffer(diag204_info_type, &pages);
-	if (IS_ERR(buf))
-		goto out;
-	rc = diag204_do_store(buf, pages);
-	if (rc)
-		return ERR_PTR(rc);
-out:
-	return buf;
-}
-
-/* Diagnose 224 functions */
-
-static int diag224_get_name_table(void)
-{
-	/* memory must be below 2GB */
-	diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
-	if (!diag224_cpu_names)
-		return -ENOMEM;
-	if (diag224(diag224_cpu_names)) {
-		free_page((unsigned long) diag224_cpu_names);
-		return -EOPNOTSUPP;
+	subcode = diag204_get_info_type();
+	subcode |= diag204_store_sc;
+	if (diag204_has_bif())
+		subcode |= DIAG204_BIF_BIT;
+	while (1) {
+		rc = diag204(subcode, pages, buf);
+		if (rc != -EBUSY)
+			break;
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+		schedule_timeout_interruptible(DIAG204_BUSY_WAIT);
 	}
-	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
-	return 0;
-}
-
-static void diag224_delete_name_table(void)
-{
-	free_page((unsigned long) diag224_cpu_names);
-}
-
-static int diag224_idx2name(int index, char *name)
-{
-	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
-	       DIAG204_CPU_NAME_LEN);
-	name[DIAG204_CPU_NAME_LEN] = 0;
-	strim(name);
-	return 0;
+	return rc < 0 ? rc : 0;
 }
 
 struct dbfs_d204_hdr {
@@ -411,8 +178,8 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
 	base = vzalloc(buf_size);
 	if (!base)
 		return -ENOMEM;
-	d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
-	rc = diag204_do_store(d204->buf, diag204_buf_pages);
+	d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr);
+	rc = diag204_store(d204->buf, diag204_buf_pages);
 	if (rc) {
 		vfree(base);
 		return rc;
@@ -437,180 +204,22 @@ __init int hypfs_diag_init(void)
 	int rc;
 
 	if (diag204_probe()) {
-		pr_err("The hardware system does not support hypfs\n");
+		pr_info("The hardware system does not support hypfs\n");
 		return -ENODATA;
 	}
 
-	if (diag204_info_type == DIAG204_INFO_EXT)
+	if (diag204_get_info_type() == DIAG204_INFO_EXT)
 		hypfs_dbfs_create_file(&dbfs_file_d204);
 
-	if (MACHINE_IS_LPAR) {
-		rc = diag224_get_name_table();
-		if (rc) {
-			pr_err("The hardware system does not provide all "
-			       "functions required by hypfs\n");
-			debugfs_remove(dbfs_d204_file);
-			return rc;
-		}
-	}
-	return 0;
+	rc = hypfs_diag_fs_init();
+	if (rc)
+		pr_err("The hardware system does not provide all functions required by hypfs\n");
+	return rc;
 }
 
 void hypfs_diag_exit(void)
 {
-	debugfs_remove(dbfs_d204_file);
-	diag224_delete_name_table();
+	hypfs_diag_fs_exit();
 	diag204_free_buffer();
 	hypfs_dbfs_remove_file(&dbfs_file_d204);
 }
-
-/*
- * Functions to create the directory structure
- * *******************************************
- */
-
-static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-	struct dentry *cpu_dir;
-	char buffer[TMP_SIZE];
-	void *rc;
-
-	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
-							    cpu_info));
-	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-	rc = hypfs_create_u64(cpu_dir, "mgmtime",
-			      cpu_info__acc_time(diag204_info_type, cpu_info) -
-			      cpu_info__lp_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	rc = hypfs_create_u64(cpu_dir, "cputime",
-			      cpu_info__lp_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	if (diag204_info_type == DIAG204_INFO_EXT) {
-		rc = hypfs_create_u64(cpu_dir, "onlinetime",
-				      cpu_info__online_time(diag204_info_type,
-							    cpu_info));
-		if (IS_ERR(rc))
-			return PTR_ERR(rc);
-	}
-	diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(cpu_dir, "type", buffer);
-	return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
-{
-	struct dentry *cpus_dir;
-	struct dentry *lpar_dir;
-	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
-	void *cpu_info;
-	int i;
-
-	part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
-	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
-	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
-	if (IS_ERR(lpar_dir))
-		return lpar_dir;
-	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return cpus_dir;
-	cpu_info = part_hdr + part_hdr__size(diag204_info_type);
-	for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
-		int rc;
-		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
-		if (rc)
-			return ERR_PTR(rc);
-		cpu_info += cpu_info__size(diag204_info_type);
-	}
-	return cpu_info;
-}
-
-static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-	struct dentry *cpu_dir;
-	char buffer[TMP_SIZE];
-	void *rc;
-
-	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
-							    cpu_info));
-	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-	if (IS_ERR(cpu_dir))
-		return PTR_ERR(cpu_dir);
-	rc = hypfs_create_u64(cpu_dir, "mgmtime",
-			      phys_cpu__mgm_time(diag204_info_type, cpu_info));
-	if (IS_ERR(rc))
-		return PTR_ERR(rc);
-	diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
-	rc = hypfs_create_str(cpu_dir, "type", buffer);
-	return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
-{
-	int i;
-	void *cpu_info;
-	struct dentry *cpus_dir;
-
-	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return cpus_dir;
-	cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
-	for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
-		int rc;
-		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
-		if (rc)
-			return ERR_PTR(rc);
-		cpu_info += phys_cpu__size(diag204_info_type);
-	}
-	return cpu_info;
-}
-
-int hypfs_diag_create_files(struct dentry *root)
-{
-	struct dentry *systems_dir, *hyp_dir;
-	void *time_hdr, *part_hdr;
-	int i, rc;
-	void *buffer, *ptr;
-
-	buffer = diag204_store();
-	if (IS_ERR(buffer))
-		return PTR_ERR(buffer);
-
-	systems_dir = hypfs_mkdir(root, "systems");
-	if (IS_ERR(systems_dir)) {
-		rc = PTR_ERR(systems_dir);
-		goto err_out;
-	}
-	time_hdr = (struct x_info_blk_hdr *)buffer;
-	part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
-	for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
-		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
-		if (IS_ERR(part_hdr)) {
-			rc = PTR_ERR(part_hdr);
-			goto err_out;
-		}
-	}
-	if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
-	    DIAG204_LPAR_PHYS_FLG) {
-		ptr = hypfs_create_phys_files(root, part_hdr);
-		if (IS_ERR(ptr)) {
-			rc = PTR_ERR(ptr);
-			goto err_out;
-		}
-	}
-	hyp_dir = hypfs_mkdir(root, "hyp");
-	if (IS_ERR(hyp_dir)) {
-		rc = PTR_ERR(hyp_dir);
-		goto err_out;
-	}
-	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
-	if (IS_ERR(ptr)) {
-		rc = PTR_ERR(ptr);
-		goto err_out;
-	}
-	rc = 0;
-
-err_out:
-	return rc;
-}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
new file mode 100644
index 000000000000..7090eff27fef
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_DIAG_H_
+#define _S390_HYPFS_DIAG_H_
+
+#include <asm/diag.h>
+
+enum diag204_format diag204_get_info_type(void);
+void *diag204_get_buffer(enum diag204_format fmt, int *pages);
+int diag204_store(void *buf, int pages);
+
+int __hypfs_diag_fs_init(void);
+void __hypfs_diag_fs_exit(void);
+
+static inline int hypfs_diag_fs_init(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		return __hypfs_diag_fs_init();
+	return 0;
+}
+
+static inline void hypfs_diag_fs_exit(void)
+{
+	if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+		__hypfs_diag_fs_exit();
+}
+
+#endif /* _S390_HYPFS_DIAG_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 9a2786079e3a..61220e717af0 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -9,6 +9,7 @@
 
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/hypfs.h>
 #include "hypfs.h"
@@ -20,8 +21,7 @@
  */
 static void diag0c_fn(void *data)
 {
-	diag_stat_inc(DIAG_STAT_X00C);
-	diag_amode31_ops.diag0c(((void **)data)[smp_processor_id()]);
+	diag0c(((void **)data)[smp_processor_id()]);
 }
 
 /*
@@ -108,7 +108,7 @@ static struct hypfs_dbfs_file dbfs_file_0c = {
  */
 int __init hypfs_diag0c_init(void)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 0;
 	hypfs_dbfs_create_file(&dbfs_file_0c);
 	return 0;
@@ -119,7 +119,7 @@ int __init hypfs_diag0c_init(void)
  */
 void hypfs_diag0c_exit(void)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 	hypfs_dbfs_remove_file(&dbfs_file_0c);
 }
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
new file mode 100644
index 000000000000..ede951dc0085
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag_fs.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/machine.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs_diag.h"
+#include "hypfs.h"
+
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+static char *diag224_cpu_names;			/* diag 224 name table */
+static int diag224_idx2name(int index, char *name);
+
+/*
+ * DIAG 204 member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_info_blk_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->npar;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_info_blk_hdr *)hdr)->flags;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
+}
+
+/* Partition header */
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_part_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_part_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+				       char *name)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	else /* DIAG204_INFO_EXT */
+		memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+		       DIAG204_LPAR_NAME_LEN);
+	EBCASC(name, DIAG204_LPAR_NAME_LEN);
+	name[DIAG204_LPAR_NAME_LEN] = 0;
+	strim(name);
+}
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_cpu_info);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->acc_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_cpu_info *)hdr)->lp_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return 0;	/* online_time not available in simple info */
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_hdr);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_hdr *)hdr)->cpus;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return sizeof(struct diag204_phys_cpu);
+	else /* DIAG204_INFO_EXT */
+		return sizeof(struct diag204_x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == DIAG204_INFO_SIMPLE)
+		return ((struct diag204_phys_cpu *)hdr)->ctidx;
+	else /* DIAG204_INFO_EXT */
+		return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	if (IS_ERR(cpu_dir))
+		return PTR_ERR(cpu_dir);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	rc = hypfs_create_u64(cpu_dir, "cputime",
+			      cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	if (diag204_get_info_type() == DIAG204_INFO_EXT) {
+		rc = hypfs_create_u64(cpu_dir, "onlinetime",
+				      cpu_info__online_time(diag204_get_info_type(),
+							    cpu_info));
+		if (IS_ERR(rc))
+			return PTR_ERR(rc);
+	}
+	diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+	struct dentry *cpus_dir;
+	struct dentry *lpar_dir;
+	char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
+	void *cpu_info;
+	int i;
+
+	part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name);
+	lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
+	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+	if (IS_ERR(lpar_dir))
+		return lpar_dir;
+	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = part_hdr + part_hdr__size(diag204_get_info_type());
+	for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += cpu_info__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(),
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	if (IS_ERR(cpu_dir))
+		return PTR_ERR(cpu_dir);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      phys_cpu__mgm_time(diag204_get_info_type(), cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+	int i;
+	void *cpu_info;
+	struct dentry *cpus_dir;
+
+	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type());
+	for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) {
+		int rc;
+
+		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += phys_cpu__size(diag204_get_info_type());
+	}
+	return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+	struct dentry *systems_dir, *hyp_dir;
+	void *time_hdr, *part_hdr;
+	void *buffer, *ptr;
+	int i, rc, pages;
+
+	buffer = diag204_get_buffer(diag204_get_info_type(), &pages);
+	if (IS_ERR(buffer))
+		return PTR_ERR(buffer);
+	rc = diag204_store(buffer, pages);
+	if (rc)
+		return rc;
+
+	systems_dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(systems_dir)) {
+		rc = PTR_ERR(systems_dir);
+		goto err_out;
+	}
+	time_hdr = (struct x_info_blk_hdr *)buffer;
+	part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type());
+	for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) {
+		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+		if (IS_ERR(part_hdr)) {
+			rc = PTR_ERR(part_hdr);
+			goto err_out;
+		}
+	}
+	if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) &
+	    DIAG204_LPAR_PHYS_FLG) {
+		ptr = hypfs_create_phys_files(root, part_hdr);
+		if (IS_ERR(ptr)) {
+			rc = PTR_ERR(ptr);
+			goto err_out;
+		}
+	}
+	hyp_dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(hyp_dir)) {
+		rc = PTR_ERR(hyp_dir);
+		goto err_out;
+	}
+	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+	if (IS_ERR(ptr)) {
+		rc = PTR_ERR(ptr);
+		goto err_out;
+	}
+	rc = 0;
+
+err_out:
+	return rc;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224_idx2name(int index, char *name)
+{
+	memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+	       DIAG204_CPU_NAME_LEN);
+	name[DIAG204_CPU_NAME_LEN] = 0;
+	strim(name);
+	return 0;
+}
+
+static int diag224_get_name_table(void)
+{
+	/* memory must be below 2GB */
+	diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!diag224_cpu_names)
+		return -ENOMEM;
+	if (diag224(diag224_cpu_names)) {
+		free_page((unsigned long)diag224_cpu_names);
+		return -EOPNOTSUPP;
+	}
+	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+	return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+	free_page((unsigned long)diag224_cpu_names);
+}
+
+int __init __hypfs_diag_fs_init(void)
+{
+	if (machine_is_lpar())
+		return diag224_get_name_table();
+	return 0;
+}
+
+void __hypfs_diag_fs_exit(void)
+{
+	diag224_delete_name_table();
+}
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index f5f7e78ddc0c..9fc3f0dae8f0 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -25,7 +25,7 @@
 
 static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
 {
-	union register_pair r1 = { .even = (unsigned long)data, };
+	union register_pair r1 = { .even = virt_to_phys(data), };
 
 	asm volatile("diag %[r1],%[r3],0x304\n"
 		     : [r1] "+&d" (r1.pair)
@@ -74,7 +74,7 @@ static int __hypfs_sprp_ioctl(void __user *user_area)
 	int rc;
 
 	rc = -ENOMEM;
-	data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	data = (void *)get_zeroed_page(GFP_KERNEL);
 	diag304 = kzalloc(sizeof(*diag304), GFP_KERNEL);
 	if (!data || !diag304)
 		goto out;
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index a3d881ca0a98..4db2895e4da3 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -11,50 +11,19 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <asm/extable.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/timex.h>
+#include "hypfs_vm.h"
 #include "hypfs.h"
 
-#define NAME_LEN 8
 #define DBFS_D2FC_HDR_VERSION 0
 
 static char local_guest[] = "        ";
 static char all_guests[] = "*       ";
 static char *all_groups = all_guests;
-static char *guest_query;
-
-struct diag2fc_data {
-	__u32 version;
-	__u32 flags;
-	__u64 used_cpu;
-	__u64 el_time;
-	__u64 mem_min_kb;
-	__u64 mem_max_kb;
-	__u64 mem_share_kb;
-	__u64 mem_used_kb;
-	__u32 pcpus;
-	__u32 lcpus;
-	__u32 vcpus;
-	__u32 ocpus;
-	__u32 cpu_max;
-	__u32 cpu_shares;
-	__u32 cpu_use_samp;
-	__u32 cpu_delay_samp;
-	__u32 page_wait_samp;
-	__u32 idle_samp;
-	__u32 other_samp;
-	__u32 total_samp;
-	char  guest_name[NAME_LEN];
-};
-
-struct diag2fc_parm_list {
-	char userid[NAME_LEN];
-	char aci_grp[NAME_LEN];
-	__u64 addr;
-	__u32 size;
-	__u32 fmt;
-};
+char *diag2fc_guest_query;
 
 static int diag2fc(int size, char* query, void *addr)
 {
@@ -62,10 +31,10 @@ static int diag2fc(int size, char* query, void *addr)
 	unsigned long rc;
 	struct diag2fc_parm_list parm_list;
 
-	memcpy(parm_list.userid, query, NAME_LEN);
-	ASCEBC(parm_list.userid, NAME_LEN);
-	memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
-	ASCEBC(parm_list.aci_grp, NAME_LEN);
+	memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN);
+	memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN);
+	ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN);
 	parm_list.addr = (unsigned long)addr;
 	parm_list.size = size;
 	parm_list.fmt = 0x02;
@@ -87,7 +56,7 @@ static int diag2fc(int size, char* query, void *addr)
 /*
  * Allocate buffer for "query" and store diag 2fc at "offset"
  */
-static void *diag2fc_store(char *query, unsigned int *count, int offset)
+void *diag2fc_store(char *query, unsigned int *count, int offset)
 {
 	void *data;
 	int size;
@@ -108,132 +77,11 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset)
 	return data;
 }
 
-static void diag2fc_free(const void *data)
+void diag2fc_free(const void *data)
 {
 	vfree(data);
 }
 
-#define ATTRIBUTE(dir, name, member) \
-do { \
-	void *rc; \
-	rc = hypfs_create_u64(dir, name, member); \
-	if (IS_ERR(rc)) \
-		return PTR_ERR(rc); \
-} while(0)
-
-static int hypfs_vm_create_guest(struct dentry *systems_dir,
-				 struct diag2fc_data *data)
-{
-	char guest_name[NAME_LEN + 1] = {};
-	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
-	int dedicated_flag, capped_value;
-
-	capped_value = (data->flags & 0x00000006) >> 1;
-	dedicated_flag = (data->flags & 0x00000008) >> 3;
-
-	/* guest dir */
-	memcpy(guest_name, data->guest_name, NAME_LEN);
-	EBCASC(guest_name, NAME_LEN);
-	strim(guest_name);
-	guest_dir = hypfs_mkdir(systems_dir, guest_name);
-	if (IS_ERR(guest_dir))
-		return PTR_ERR(guest_dir);
-	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
-
-	/* logical cpu information */
-	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
-	if (IS_ERR(cpus_dir))
-		return PTR_ERR(cpus_dir);
-	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
-	ATTRIBUTE(cpus_dir, "capped", capped_value);
-	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
-	ATTRIBUTE(cpus_dir, "count", data->vcpus);
-	/*
-	 * Note: The "weight_min" attribute got the wrong name.
-	 * The value represents the number of non-stopped (operating)
-	 * CPUS.
-	 */
-	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
-	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
-	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
-
-	/* memory information */
-	mem_dir = hypfs_mkdir(guest_dir, "mem");
-	if (IS_ERR(mem_dir))
-		return PTR_ERR(mem_dir);
-	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
-	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
-	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
-	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
-
-	/* samples */
-	samples_dir = hypfs_mkdir(guest_dir, "samples");
-	if (IS_ERR(samples_dir))
-		return PTR_ERR(samples_dir);
-	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
-	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
-	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
-	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
-	ATTRIBUTE(samples_dir, "other", data->other_samp);
-	ATTRIBUTE(samples_dir, "total", data->total_samp);
-	return 0;
-}
-
-int hypfs_vm_create_files(struct dentry *root)
-{
-	struct dentry *dir, *file;
-	struct diag2fc_data *data;
-	unsigned int count = 0;
-	int rc, i;
-
-	data = diag2fc_store(guest_query, &count, 0);
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	/* Hypervisor Info */
-	dir = hypfs_mkdir(root, "hyp");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
-	if (IS_ERR(file)) {
-		rc = PTR_ERR(file);
-		goto failed;
-	}
-
-	/* physical cpus */
-	dir = hypfs_mkdir(root, "cpus");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-	file = hypfs_create_u64(dir, "count", data->lcpus);
-	if (IS_ERR(file)) {
-		rc = PTR_ERR(file);
-		goto failed;
-	}
-
-	/* guests */
-	dir = hypfs_mkdir(root, "systems");
-	if (IS_ERR(dir)) {
-		rc = PTR_ERR(dir);
-		goto failed;
-	}
-
-	for (i = 0; i < count; i++) {
-		rc = hypfs_vm_create_guest(dir, &(data[i]));
-		if (rc)
-			goto failed;
-	}
-	diag2fc_free(data);
-	return 0;
-
-failed:
-	diag2fc_free(data);
-	return rc;
-}
-
 struct dbfs_d2fc_hdr {
 	u64	len;		/* Length of d2fc buffer without header */
 	u16	version;	/* Version of header */
@@ -252,7 +100,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
 	struct dbfs_d2fc *d2fc;
 	unsigned int count;
 
-	d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+	d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr));
 	if (IS_ERR(d2fc))
 		return PTR_ERR(d2fc);
 	store_tod_clock_ext(&d2fc->hdr.tod_ext);
@@ -274,12 +122,12 @@ static struct hypfs_dbfs_file dbfs_file_2fc = {
 
 int hypfs_vm_init(void)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 0;
 	if (diag2fc(0, all_guests, NULL) > 0)
-		guest_query = all_guests;
+		diag2fc_guest_query = all_guests;
 	else if (diag2fc(0, local_guest, NULL) > 0)
-		guest_query = local_guest;
+		diag2fc_guest_query = local_guest;
 	else
 		return -EACCES;
 	hypfs_dbfs_create_file(&dbfs_file_2fc);
@@ -288,7 +136,7 @@ int hypfs_vm_init(void)
 
 void hypfs_vm_exit(void)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 	hypfs_dbfs_remove_file(&dbfs_file_2fc);
 }
diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h
new file mode 100644
index 000000000000..fe2e5851addd
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_VM_H_
+#define _S390_HYPFS_VM_H_
+
+#define DIAG2FC_NAME_LEN 8
+
+struct diag2fc_data {
+	__u32 version;
+	__u32 flags;
+	__u64 used_cpu;
+	__u64 el_time;
+	__u64 mem_min_kb;
+	__u64 mem_max_kb;
+	__u64 mem_share_kb;
+	__u64 mem_used_kb;
+	__u32 pcpus;
+	__u32 lcpus;
+	__u32 vcpus;
+	__u32 ocpus;
+	__u32 cpu_max;
+	__u32 cpu_shares;
+	__u32 cpu_use_samp;
+	__u32 cpu_delay_samp;
+	__u32 page_wait_samp;
+	__u32 idle_samp;
+	__u32 other_samp;
+	__u32 total_samp;
+	char  guest_name[DIAG2FC_NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+	char userid[DIAG2FC_NAME_LEN];
+	char aci_grp[DIAG2FC_NAME_LEN];
+	__u64 addr;
+	__u32 size;
+	__u32 fmt;
+};
+
+void *diag2fc_store(char *query, unsigned int *count, int offset);
+void diag2fc_free(const void *data);
+extern char *diag2fc_guest_query;
+
+#endif /* _S390_HYPFS_VM_H_ */
diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c
new file mode 100644
index 000000000000..6011289afa8c
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm_fs.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/extable.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs_vm.h"
+#include "hypfs.h"
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+	void *rc; \
+	rc = hypfs_create_u64(dir, name, member); \
+	if (IS_ERR(rc)) \
+		return PTR_ERR(rc); \
+} while (0)
+
+static int hypfs_vm_create_guest(struct dentry *systems_dir,
+				 struct diag2fc_data *data)
+{
+	char guest_name[DIAG2FC_NAME_LEN + 1] = {};
+	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+	int dedicated_flag, capped_value;
+
+	capped_value = (data->flags & 0x00000006) >> 1;
+	dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+	/* guest dir */
+	memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN);
+	EBCASC(guest_name, DIAG2FC_NAME_LEN);
+	strim(guest_name);
+	guest_dir = hypfs_mkdir(systems_dir, guest_name);
+	if (IS_ERR(guest_dir))
+		return PTR_ERR(guest_dir);
+	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+	/* logical cpu information */
+	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return PTR_ERR(cpus_dir);
+	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+	ATTRIBUTE(cpus_dir, "capped", capped_value);
+	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+	ATTRIBUTE(cpus_dir, "count", data->vcpus);
+	/*
+	 * Note: The "weight_min" attribute got the wrong name.
+	 * The value represents the number of non-stopped (operating)
+	 * CPUS.
+	 */
+	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+	/* memory information */
+	mem_dir = hypfs_mkdir(guest_dir, "mem");
+	if (IS_ERR(mem_dir))
+		return PTR_ERR(mem_dir);
+	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+	/* samples */
+	samples_dir = hypfs_mkdir(guest_dir, "samples");
+	if (IS_ERR(samples_dir))
+		return PTR_ERR(samples_dir);
+	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+	ATTRIBUTE(samples_dir, "other", data->other_samp);
+	ATTRIBUTE(samples_dir, "total", data->total_samp);
+	return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+	struct dentry *dir, *file;
+	struct diag2fc_data *data;
+	unsigned int count = 0;
+	int rc, i;
+
+	data = diag2fc_store(diag2fc_guest_query, &count, 0);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	/* Hypervisor Info */
+	dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* physical cpus */
+	dir = hypfs_mkdir(root, "cpus");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_u64(dir, "count", data->lcpus);
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* guests */
+	dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = hypfs_vm_create_guest(dir, &data[i]);
+		if (rc)
+			goto failed;
+	}
+	diag2fc_free(data);
+	return 0;
+
+failed:
+	diag2fc_free(data);
+	return rc;
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5c97f48cea91..96409573c75d 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/kobject.h>
 #include <linux/seq_file.h>
 #include <linux/uio.h>
+#include <asm/machine.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
@@ -53,7 +54,7 @@ static void hypfs_update_update(struct super_block *sb)
 	struct inode *inode = d_inode(sb_info->update_file);
 
 	sb_info->last_update = ktime_get_seconds();
-	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+	simple_inode_init_ts(inode);
 }
 
 /* directory tree removal functions */
@@ -101,7 +102,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
 		ret->i_mode = mode;
 		ret->i_uid = hypfs_info->uid;
 		ret->i_gid = hypfs_info->gid;
-		ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+		simple_inode_init_ts(ret);
 		if (S_ISDIR(mode))
 			set_nlink(ret, 2);
 	}
@@ -184,7 +185,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		goto out;
 	}
 	hypfs_delete_tree(sb->s_root);
-	if (MACHINE_IS_VM)
+	if (machine_is_vm())
 		rc = hypfs_vm_create_files(sb->s_root);
 	else
 		rc = hypfs_diag_create_files(sb->s_root);
@@ -273,7 +274,7 @@ static int hypfs_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_root = root_dentry = d_make_root(root_inode);
 	if (!root_dentry)
 		return -ENOMEM;
-	if (MACHINE_IS_VM)
+	if (machine_is_vm())
 		rc = hypfs_vm_create_files(root_dentry);
 	else
 		rc = hypfs_diag_create_files(root_dentry);
@@ -341,7 +342,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
 	struct inode *inode;
 
 	inode_lock(d_inode(parent));
-	dentry = lookup_one_len(name, parent, strlen(name));
+	dentry = lookup_noperm(&QSTR(name), parent);
 	if (IS_ERR(dentry)) {
 		dentry = ERR_PTR(-ENOMEM);
 		goto fail;
@@ -443,7 +444,6 @@ static const struct file_operations hypfs_file_ops = {
 	.release	= hypfs_release,
 	.read_iter	= hypfs_read_iter,
 	.write_iter	= hypfs_write_iter,
-	.llseek		= no_llseek,
 };
 
 static struct file_system_type hypfs_type = {
@@ -460,45 +460,18 @@ static const struct super_operations hypfs_s_ops = {
 	.show_options	= hypfs_show_options,
 };
 
-static int __init hypfs_init(void)
+int __init __hypfs_fs_init(void)
 {
 	int rc;
 
-	hypfs_dbfs_init();
-
-	if (hypfs_diag_init()) {
-		rc = -ENODATA;
-		goto fail_dbfs_exit;
-	}
-	if (hypfs_vm_init()) {
-		rc = -ENODATA;
-		goto fail_hypfs_diag_exit;
-	}
-	hypfs_sprp_init();
-	if (hypfs_diag0c_init()) {
-		rc = -ENODATA;
-		goto fail_hypfs_sprp_exit;
-	}
 	rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
 	if (rc)
-		goto fail_hypfs_diag0c_exit;
+		return rc;
 	rc = register_filesystem(&hypfs_type);
 	if (rc)
-		goto fail_filesystem;
+		goto fail;
 	return 0;
-
-fail_filesystem:
+fail:
 	sysfs_remove_mount_point(hypervisor_kobj, "s390");
-fail_hypfs_diag0c_exit:
-	hypfs_diag0c_exit();
-fail_hypfs_sprp_exit:
-	hypfs_sprp_exit();
-	hypfs_vm_exit();
-fail_hypfs_diag_exit:
-	hypfs_diag_exit();
-fail_dbfs_exit:
-	hypfs_dbfs_exit();
-	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
 	return rc;
 }
-device_initcall(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 1a18d7b82f86..297bf7157968 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,6 +5,6 @@ generated-y += syscall_table.h
 generated-y += unistd_nr.h
 
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_types.h
 generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 000000000000..317c07c09ae4
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <linux/smp.h>
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE	(NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+static inline struct lowcore *get_abs_lowcore(void)
+{
+	int cpu;
+
+	cpu = get_cpu();
+	return ((struct lowcore *)__abs_lowcore) + cpu;
+}
+
+static inline void put_abs_lowcore(struct lowcore *lc)
+{
+	put_cpu();
+}
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/access-regs.h b/arch/s390/include/asm/access-regs.h
new file mode 100644
index 000000000000..1a6412d9f5ad
--- /dev/null
+++ b/arch/s390/include/asm/access-regs.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2024
+ */
+
+#ifndef __ASM_S390_ACCESS_REGS_H
+#define __ASM_S390_ACCESS_REGS_H
+
+#include <linux/instrumented.h>
+#include <asm/sigcontext.h>
+
+struct access_regs {
+	unsigned int regs[NUM_ACRS];
+};
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+	struct access_regs *regs = (struct access_regs *)acrs;
+
+	instrument_write(regs, sizeof(*regs));
+	asm volatile("stamy	0,15,%[regs]"
+		     : [regs] "=QS" (*regs)
+		     :
+		     : "memory");
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+	struct access_regs *regs = (struct access_regs *)acrs;
+
+	instrument_read(regs, sizeof(*regs));
+	asm volatile("lamy	0,15,%[regs]"
+		     :
+		     : [regs] "QS" (*regs)
+		     : "memory");
+}
+
+#endif /* __ASM_S390_ACCESS_REGS_H */
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 01936fdfaddb..c4c28c2609a5 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -12,12 +12,12 @@
 
 #include <linux/bit_spinlock.h>
 #include <linux/dma-mapping.h>
+#include <asm/tpi.h>
 
 struct airq_struct {
 	struct hlist_node list;		/* Handler queueing. */
-	void (*handler)(struct airq_struct *airq, bool floating);
+	void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
 	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
-	u8 lsi_mask;			/* Local-Summary-Indicator mask */
 	u8 isc;				/* Interrupt-subclass */
 	u8 flags;
 };
@@ -46,8 +46,10 @@ struct airq_iv {
 #define AIRQ_IV_PTR		4	/* Allocate the ptr array */
 #define AIRQ_IV_DATA		8	/* Allocate the data array */
 #define AIRQ_IV_CACHELINE	16	/* Cacheline alignment for the vector */
+#define AIRQ_IV_GUESTVEC	32	/* Vector is a pinned guest page */
 
-struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+			       unsigned long *vec);
 void airq_iv_release(struct airq_iv *iv);
 unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
 void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
deleted file mode 100644
index 7db046596b93..000000000000
--- a/arch/s390/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_ALTERNATIVE_ASM_H
-#define _ASM_S390_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
-	.long	\orig_start - .
-	.long	\alt_start - .
-	.word	\feature
-	.byte	\orig_end - \orig_start
-	.org	. - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
-	.org	. - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-	.pushsection .altinstr_replacement,"ax"
-770:	\newinstr
-771:	.popsection
-772:	\oldinstr
-773:	.pushsection .altinstructions,"a"
-	alt_entry 772b, 773b, 770b, 771b, \feature
-	.popsection
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-	.pushsection .altinstr_replacement,"ax"
-770:	\newinstr1
-771:	\newinstr2
-772:	.popsection
-773:	\oldinstr
-774:	.pushsection .altinstructions,"a"
-	alt_entry 773b, 774b, 770b, 771b,\feature1
-	alt_entry 773b, 774b, 771b, 772b,\feature2
-	.popsection
-.endm
-
-#endif	/*  __ASSEMBLY__  */
-
-#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index 904dd049f954..c7bf60a541e9 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -2,6 +2,55 @@
 #ifndef _ASM_S390_ALTERNATIVE_H
 #define _ASM_S390_ALTERNATIVE_H
 
+/*
+ * Each alternative comes with a 32 bit feature field:
+ *	union {
+ *		u32 feature;
+ *		struct {
+ *			u32 ctx	 : 4;
+ *			u32 type : 8;
+ *			u32 data : 20;
+ *		};
+ *	}
+ *
+ * @ctx is a bitfield, where only one bit must be set. Each bit defines
+ * in which context an alternative is supposed to be applied to the
+ * kernel image:
+ *
+ * - from the decompressor before the kernel itself is executed
+ * - from early kernel code from within the kernel
+ *
+ * @type is a number which defines the type and with that the type
+ * specific alternative patching.
+ *
+ * @data is additional type specific information which defines if an
+ * alternative should be applied.
+ */
+
+#define ALT_CTX_EARLY		1
+#define ALT_CTX_LATE		2
+#define ALT_CTX_ALL		(ALT_CTX_EARLY | ALT_CTX_LATE)
+
+#define ALT_TYPE_FACILITY	0
+#define ALT_TYPE_FEATURE	1
+#define ALT_TYPE_SPEC		2
+
+#define ALT_DATA_SHIFT		0
+#define ALT_TYPE_SHIFT		20
+#define ALT_CTX_SHIFT		28
+
+#define ALT_FACILITY(facility)		(ALT_CTX_EARLY << ALT_CTX_SHIFT		| \
+					 ALT_TYPE_FACILITY << ALT_TYPE_SHIFT	| \
+					 (facility) << ALT_DATA_SHIFT)
+
+#define ALT_FEATURE(feature)		(ALT_CTX_EARLY << ALT_CTX_SHIFT		| \
+					 ALT_TYPE_FEATURE << ALT_TYPE_SHIFT	| \
+					 (feature) << ALT_DATA_SHIFT)
+
+#define ALT_SPEC(facility)		(ALT_CTX_LATE << ALT_CTX_SHIFT		| \
+					 ALT_TYPE_SPEC << ALT_TYPE_SHIFT	| \
+					 (facility) << ALT_DATA_SHIFT)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
@@ -11,12 +60,30 @@
 struct alt_instr {
 	s32 instr_offset;	/* original instruction */
 	s32 repl_offset;	/* offset to replacement instruction */
-	u16 facility;		/* facility bit set for replacement */
+	union {
+		u32 feature;	/* feature required for replacement */
+		struct {
+			u32 ctx	 : 4;  /* context */
+			u32 type : 8;  /* type of alternative */
+			u32 data : 20; /* patching information */
+		};
+	};
 	u8  instrlen;		/* length of original instruction */
 } __packed;
 
-void apply_alternative_instructions(void);
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx);
+
+static inline void apply_alternative_instructions(void)
+{
+	__apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE);
+}
+
+static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+	__apply_alternatives(start, end, ALT_CTX_ALL);
+}
 
 /*
  * +---------------------------------+
@@ -48,11 +115,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 #define OLDINSTR(oldinstr) \
 	"661:\n\t" oldinstr "\n662:\n"
 
-#define ALTINSTR_ENTRY(facility, num)					\
+#define ALTINSTR_ENTRY(feature, num)					\
 	"\t.long 661b - .\n"			/* old instruction */	\
 	"\t.long " b_altinstr(num)"b - .\n"	/* alt instruction */	\
-	"\t.word " __stringify(facility) "\n"	/* facility bit    */	\
+	"\t.long " __stringify(feature) "\n"	/* feature	   */	\
 	"\t.byte " oldinstr_len "\n"		/* instruction len */	\
+	"\t.org . - (" oldinstr_len ") & 1\n"				\
 	"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n"	\
 	"\t.org . - (" altinstr_len(num) ") + (" oldinstr_len ")\n"
 
@@ -60,24 +128,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 	b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
 
 /* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, altinstr, facility) \
+#define ALTERNATIVE(oldinstr, altinstr, feature) \
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(altinstr, 1)				\
 	".popsection\n"							\
 	OLDINSTR(oldinstr)						\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(facility, 1)					\
+	ALTINSTR_ENTRY(feature, 1)					\
 	".popsection\n"
 
-#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\
 	".pushsection .altinstr_replacement, \"ax\"\n"			\
 	ALTINSTR_REPLACEMENT(altinstr1, 1)				\
 	ALTINSTR_REPLACEMENT(altinstr2, 2)				\
 	".popsection\n"							\
 	OLDINSTR(oldinstr)						\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(facility1, 1)					\
-	ALTINSTR_ENTRY(facility2, 2)					\
+	ALTINSTR_ENTRY(feature1, 1)					\
+	ALTINSTR_ENTRY(feature2, 2)					\
 	".popsection\n"
 
 /*
@@ -92,12 +160,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
  * For non barrier like inlines please define new variants
  * without volatile and memory clobber.
  */
-#define alternative(oldinstr, altinstr, facility)			\
-	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+#define alternative(oldinstr, altinstr, feature)			\
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory")
 
-#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
-	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,   \
-				   altinstr2, facility2) ::: "memory")
+#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1,   \
+				   altinstr2, feature2) ::: "memory")
 
 /* Alternative inline assembly with input. */
 #define alternative_input(oldinstr, newinstr, feature, input...)	\
@@ -105,8 +173,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 		: : input)
 
 /* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, altinstr, facility, output, input...)	\
-	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility)	\
+#define alternative_io(oldinstr, altinstr, feature, output, input...)	\
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature)	\
 		: output : input)
 
 /* Use this macro if more than one output parameter is needed. */
@@ -115,6 +183,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 /* Use this macro if clobbers are needed without inputs. */
 #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
 
+#else  /* __ASSEMBLY__ */
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+	.long	\orig_start - .
+	.long	\alt_start - .
+	.long	\feature
+	.byte	\orig_end - \orig_start
+	.org	. - ( \orig_end - \orig_start ) & 1
+	.org	. - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+	.org	. - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr
+771:	.popsection
+772:	\oldinstr
+773:	.pushsection .altinstructions,"a"
+	alt_entry 772b, 773b, 770b, 771b, \feature
+	.popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr1
+771:	\newinstr2
+772:	.popsection
+773:	\oldinstr
+774:	.pushsection .altinstructions,"a"
+	alt_entry 773b, 774b, 770b, 771b,\feature1
+	alt_entry 773b, 774b, 771b, 772b,\feature2
+	.popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index b515cfa62bd9..395b02d6a133 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -43,10 +43,24 @@ struct ap_queue_status {
 	unsigned int queue_empty	: 1;
 	unsigned int replies_waiting	: 1;
 	unsigned int queue_full		: 1;
-	unsigned int _pad1		: 4;
+	unsigned int			: 3;
+	unsigned int async		: 1;
 	unsigned int irq_enabled	: 1;
 	unsigned int response_code	: 8;
-	unsigned int _pad2		: 16;
+	unsigned int			: 16;
+};
+
+/*
+ * AP queue status reg union to access the reg1
+ * register with the lower 32 bits comprising the
+ * ap queue status.
+ */
+union ap_queue_status_reg {
+	unsigned long value;
+	struct {
+		u32 _pad;
+		struct ap_queue_status status;
+	};
 };
 
 /**
@@ -73,16 +87,55 @@ static inline bool ap_instructions_available(void)
 	return reg1 != 0;
 }
 
+/* TAPQ register GR2 response struct */
+struct ap_tapq_hwinfo {
+	union {
+		unsigned long value;
+		struct {
+			unsigned int fac    : 32; /* facility bits */
+			unsigned int apinfo : 32; /* ap type, ... */
+		};
+		struct {
+			unsigned int apsc  :  1; /* APSC */
+			unsigned int mex4k :  1; /* AP4KM */
+			unsigned int crt4k :  1; /* AP4KC */
+			unsigned int cca   :  1; /* D */
+			unsigned int accel :  1; /* A */
+			unsigned int ep11  :  1; /* X */
+			unsigned int apxa  :  1; /* APXA */
+			unsigned int	   :  1;
+			unsigned int class :  8;
+			unsigned int bs	   :  2; /* SE bind/assoc */
+			unsigned int	   : 14;
+			unsigned int at	   :  8; /* ap type */
+			unsigned int nd	   :  8; /* nr of domains */
+			unsigned int	   :  4;
+			unsigned int ml	   :  4; /* apxl ml */
+			unsigned int	   :  4;
+			unsigned int qd	   :  4; /* queue depth */
+		};
+	};
+};
+
+/*
+ * Convenience defines to be used with the bs field from struct ap_tapq_gr2
+ */
+#define AP_BS_Q_USABLE		      0
+#define AP_BS_Q_USABLE_NO_SECURE_KEY  1
+#define AP_BS_Q_AVAIL_FOR_BINDING     2
+#define AP_BS_Q_UNUSABLE	      3
+
 /**
  * ap_tapq(): Test adjunct processor queue.
  * @qid: The AP queue number
- * @info: Pointer to queue descriptor
+ * @info: Pointer to tapq hwinfo struct
  *
  * Returns AP queue status structure.
  */
-static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
+static inline struct ap_queue_status ap_tapq(ap_qid_t qid,
+					     struct ap_tapq_hwinfo *info)
 {
-	struct ap_queue_status reg1;
+	union ap_queue_status_reg reg1;
 	unsigned long reg2;
 
 	asm volatile(
@@ -91,25 +144,24 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
 		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(TAPQ) */
 		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
 		"	lgr	%[reg2],2\n"		/* gr2 into reg2 */
-		: [reg1] "=&d" (reg1), [reg2] "=&d" (reg2)
+		: [reg1] "=&d" (reg1.value), [reg2] "=&d" (reg2)
 		: [qid] "d" (qid)
 		: "cc", "0", "1", "2");
 	if (info)
-		*info = reg2;
-	return reg1;
+		info->value = reg2;
+	return reg1.status;
 }
 
 /**
  * ap_test_queue(): Test adjunct processor queue.
  * @qid: The AP queue number
  * @tbit: Test facilities bit
- * @info: Pointer to queue descriptor
+ * @info: Ptr to tapq gr2 struct
  *
  * Returns AP queue status structure.
  */
-static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
-						   int tbit,
-						   unsigned long *info)
+static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit,
+						   struct ap_tapq_hwinfo *info)
 {
 	if (tbit)
 		qid |= 1UL << 23; /* set T bit*/
@@ -119,43 +171,51 @@ static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
 /**
  * ap_pqap_rapq(): Reset adjunct processor queue.
  * @qid: The AP queue number
+ * @fbit: if != 0 set F bit
  *
  * Returns AP queue status structure.
  */
-static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
+static inline struct ap_queue_status ap_rapq(ap_qid_t qid, int fbit)
 {
 	unsigned long reg0 = qid | (1UL << 24);  /* fc 1UL is RAPQ */
-	struct ap_queue_status reg1;
+	union ap_queue_status_reg reg1;
+
+	if (fbit)
+		reg0 |= 1UL << 22;
 
 	asm volatile(
 		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
 		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(RAPQ) */
 		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
-		: [reg1] "=&d" (reg1)
+		: [reg1] "=&d" (reg1.value)
 		: [reg0] "d" (reg0)
 		: "cc", "0", "1");
-	return reg1;
+	return reg1.status;
 }
 
 /**
  * ap_pqap_zapq(): Reset and zeroize adjunct processor queue.
  * @qid: The AP queue number
+ * @fbit: if != 0 set F bit
  *
  * Returns AP queue status structure.
  */
-static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
+static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit)
 {
 	unsigned long reg0 = qid | (2UL << 24);  /* fc 2UL is ZAPQ */
-	struct ap_queue_status reg1;
+	union ap_queue_status_reg reg1;
+
+	if (fbit)
+		reg0 |= 1UL << 22;
 
 	asm volatile(
 		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
 		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(ZAPQ) */
 		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
-		: [reg1] "=&d" (reg1)
+		: [reg1] "=&d" (reg1.value)
 		: [reg0] "d" (reg0)
 		: "cc", "0", "1");
-	return reg1;
+	return reg1.status;
 }
 
 /**
@@ -163,19 +223,25 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
  * config info as returned by the ap_qci() function.
  */
 struct ap_config_info {
-	unsigned int apsc	 : 1;	/* S bit */
-	unsigned int apxa	 : 1;	/* N bit */
-	unsigned int qact	 : 1;	/* C bit */
-	unsigned int rc8a	 : 1;	/* R bit */
-	unsigned char _reserved1 : 4;
-	unsigned char _reserved2[3];
-	unsigned char Na;		/* max # of APs - 1 */
-	unsigned char Nd;		/* max # of Domains - 1 */
-	unsigned char _reserved3[10];
+	union {
+		unsigned int flags;
+		struct {
+			unsigned int apsc	 : 1;	/* S bit */
+			unsigned int apxa	 : 1;	/* N bit */
+			unsigned int qact	 : 1;	/* C bit */
+			unsigned int rc8a	 : 1;	/* R bit */
+			unsigned int		 : 4;
+			unsigned int apsb	 : 1;	/* B bit */
+			unsigned int		 : 23;
+		};
+	};
+	unsigned char na;		/* max # of APs - 1 */
+	unsigned char nd;		/* max # of Domains - 1 */
+	unsigned char _reserved0[10];
 	unsigned int apm[8];		/* AP ID mask */
 	unsigned int aqm[8];		/* AP (usage) queue mask */
 	unsigned int adm[8];		/* AP (control) domain mask */
-	unsigned char _reserved4[16];
+	unsigned char _reserved1[16];
 } __aligned(8);
 
 /**
@@ -209,41 +275,40 @@ static inline int ap_qci(struct ap_config_info *config)
  * parameter to the PQAP(AQIC) instruction. For details please
  * see the AR documentation.
  */
-struct ap_qirq_ctrl {
-	unsigned int _res1 : 8;
-	unsigned int zone  : 8;	/* zone info */
-	unsigned int ir    : 1;	/* ir flag: enable (1) or disable (0) irq */
-	unsigned int _res2 : 4;
-	unsigned int gisc  : 3;	/* guest isc field */
-	unsigned int _res3 : 6;
-	unsigned int gf    : 2;	/* gisa format */
-	unsigned int _res4 : 1;
-	unsigned int gisa  : 27;	/* gisa origin */
-	unsigned int _res5 : 1;
-	unsigned int isc   : 3;	/* irq sub class */
+union ap_qirq_ctrl {
+	unsigned long value;
+	struct {
+		unsigned int	   : 8;
+		unsigned int zone  : 8;	/* zone info */
+		unsigned int ir	   : 1;	/* ir flag: enable (1) or disable (0) irq */
+		unsigned int	   : 4;
+		unsigned int gisc  : 3;	/* guest isc field */
+		unsigned int	   : 6;
+		unsigned int gf	   : 2;	/* gisa format */
+		unsigned int	   : 1;
+		unsigned int gisa  : 27;	/* gisa origin */
+		unsigned int	   : 1;
+		unsigned int isc   : 3;	/* irq sub class */
+	};
 };
 
 /**
  * ap_aqic(): Control interruption for a specific AP.
  * @qid: The AP queue number
  * @qirqctrl: struct ap_qirq_ctrl (64 bit value)
- * @ind: The notification indicator byte
+ * @pa_ind: Physical address of the notification indicator byte
  *
  * Returns AP queue status.
  */
 static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
-					     struct ap_qirq_ctrl qirqctrl,
-					     void *ind)
+					     union ap_qirq_ctrl qirqctrl,
+					     phys_addr_t pa_ind)
 {
 	unsigned long reg0 = qid | (3UL << 24);  /* fc 3UL is AQIC */
-	union {
-		unsigned long value;
-		struct ap_qirq_ctrl qirqctrl;
-		struct ap_queue_status status;
-	} reg1;
-	unsigned long reg2 = virt_to_phys(ind);
+	union ap_queue_status_reg reg1;
+	unsigned long reg2 = pa_ind;
 
-	reg1.qirqctrl = qirqctrl;
+	reg1.value = qirqctrl.value;
 
 	asm volatile(
 		"	lgr	0,%[reg0]\n"		/* qid param into gr0 */
@@ -251,9 +316,9 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
 		"	lgr	2,%[reg2]\n"		/* ni addr into gr2 */
 		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(AQIC) */
 		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
-		: [reg1] "+&d" (reg1)
+		: [reg1] "+&d" (reg1.value)
 		: [reg0] "d" (reg0), [reg2] "d" (reg2)
-		: "cc", "0", "1", "2");
+		: "cc", "memory", "0", "1", "2");
 
 	return reg1.status;
 }
@@ -276,7 +341,7 @@ union ap_qact_ap_info {
 };
 
 /**
- * ap_qact(): Query AP combatibility type.
+ * ap_qact(): Query AP compatibility type.
  * @qid: The AP queue number
  * @apinfo: On input the info about the AP queue. On output the
  *	    alternate AP queue info provided by the qact function
@@ -288,10 +353,7 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
 					     union ap_qact_ap_info *apinfo)
 {
 	unsigned long reg0 = qid | (5UL << 24) | ((ifbit & 0x01) << 22);
-	union {
-		unsigned long value;
-		struct ap_queue_status status;
-	} reg1;
+	union ap_queue_status_reg reg1;
 	unsigned long reg2;
 
 	reg1.value = apinfo->val;
@@ -302,13 +364,66 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
 		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(QACT) */
 		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
 		"	lgr	%[reg2],2\n"		/* qact out info into reg2 */
-		: [reg1] "+&d" (reg1), [reg2] "=&d" (reg2)
+		: [reg1] "+&d" (reg1.value), [reg2] "=&d" (reg2)
 		: [reg0] "d" (reg0)
 		: "cc", "0", "1", "2");
 	apinfo->val = reg2;
 	return reg1.status;
 }
 
+/*
+ * ap_bapq(): SE bind AP queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ *
+ * Invoking this function in a non-SE environment
+ * may case a specification exception.
+ */
+static inline struct ap_queue_status ap_bapq(ap_qid_t qid)
+{
+	unsigned long reg0 = qid | (7UL << 24);  /* fc 7 is BAPQ */
+	union ap_queue_status_reg reg1;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(BAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0)
+		: "cc", "0", "1");
+
+	return reg1.status;
+}
+
+/*
+ * ap_aapq(): SE associate AP queue.
+ * @qid: The AP queue number
+ * @sec_idx: The secret index
+ *
+ * Returns AP queue status structure.
+ *
+ * Invoking this function in a non-SE environment
+ * may case a specification exception.
+ */
+static inline struct ap_queue_status ap_aapq(ap_qid_t qid, unsigned int sec_idx)
+{
+	unsigned long reg0 = qid | (8UL << 24);  /* fc 8 is AAPQ */
+	unsigned long reg2 = sec_idx;
+	union ap_queue_status_reg reg1;
+
+	asm volatile(
+		"	lgr	0,%[reg0]\n"		/* qid arg into gr0 */
+		"	lgr	2,%[reg2]\n"		/* secret index into gr2 */
+		"	.insn	rre,0xb2af0000,0,0\n"	/* PQAP(AAPQ) */
+		"	lgr	%[reg1],1\n"		/* gr1 (status) into reg1 */
+		: [reg1] "=&d" (reg1.value)
+		: [reg0] "d" (reg0), [reg2] "d" (reg2)
+		: "cc", "0", "1", "2");
+
+	return reg1.status;
+}
+
 /**
  * ap_nqap(): Send message to adjunct processor queue.
  * @qid: The AP queue number
@@ -327,7 +442,7 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
 {
 	unsigned long reg0 = qid | 0x40000000UL;  /* 0x4... is last msg part */
 	union register_pair nqap_r1, nqap_r2;
-	struct ap_queue_status reg1;
+	union ap_queue_status_reg reg1;
 
 	nqap_r1.even = (unsigned int)(psmid >> 32);
 	nqap_r1.odd  = psmid & 0xffffffff;
@@ -339,21 +454,22 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
 		"0:	.insn	rre,0xb2ad0000,%[nqap_r1],%[nqap_r2]\n"
 		"	brc	2,0b\n"       /* handle partial completion */
 		"	lgr	%[reg1],1\n"  /* gr1 (status) into reg1 */
-		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1),
+		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
 		  [nqap_r2] "+&d" (nqap_r2.pair)
 		: [nqap_r1] "d" (nqap_r1.pair)
 		: "cc", "memory", "0", "1");
-	return reg1;
+	return reg1.status;
 }
 
 /**
  * ap_dqap(): Receive message from adjunct processor queue.
  * @qid: The AP queue number
  * @psmid: Pointer to program supplied message identifier
- * @msg: The message text
- * @length: The message length
- * @reslength: Resitual length on return
- * @resgr0: input: gr0 value (only used if != 0), output: resitual gr0 content
+ * @msg: Pointer to message buffer
+ * @msglen: Message buffer size
+ * @length: Pointer to length of actually written bytes
+ * @reslength: Residual length on return
+ * @resgr0: input: gr0 value (only used if != 0), output: residual gr0 content
  *
  * Returns AP queue status structure.
  * Condition code 1 on DQAP means the receive has taken place
@@ -377,20 +493,21 @@ static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
  * *resgr0 is to be used instead of qid to further process this entry.
  */
 static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
-					     unsigned long long *psmid,
-					     void *msg, size_t length,
+					     unsigned long *psmid,
+					     void *msg, size_t msglen,
+					     size_t *length,
 					     size_t *reslength,
 					     unsigned long *resgr0)
 {
 	unsigned long reg0 = resgr0 && *resgr0 ? *resgr0 : qid | 0x80000000UL;
-	struct ap_queue_status reg1;
+	union ap_queue_status_reg reg1;
 	unsigned long reg2;
 	union register_pair rp1, rp2;
 
 	rp1.even = 0UL;
 	rp1.odd  = 0UL;
 	rp2.even = (unsigned long)msg;
-	rp2.odd  = (unsigned long)length;
+	rp2.odd  = (unsigned long)msglen;
 
 	asm volatile(
 		"	lgr	0,%[reg0]\n"   /* qid param into gr0 */
@@ -402,8 +519,9 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
 		"2:	lgr	%[reg0],0\n"   /* gr0 (qid + info) into reg0 */
 		"	lgr	%[reg1],1\n"   /* gr1 (status) into reg1 */
 		"	lgr	%[reg2],2\n"   /* gr2 (res length) into reg2 */
-		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1), [reg2] "=&d" (reg2),
-		  [rp1] "+&d" (rp1.pair), [rp2] "+&d" (rp2.pair)
+		: [reg0] "+&d" (reg0), [reg1] "=&d" (reg1.value),
+		  [reg2] "=&d" (reg2), [rp1] "+&d" (rp1.pair),
+		  [rp2] "+&d" (rp2.pair)
 		:
 		: "cc", "memory", "0", "1", "2");
 
@@ -415,27 +533,20 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
 		 * Signal the caller that this dqap is only partially received
 		 * with a special status response code 0xFF and *resgr0 updated
 		 */
-		reg1.response_code = 0xFF;
+		reg1.status.response_code = 0xFF;
 		if (resgr0)
 			*resgr0 = reg0;
 	} else {
-		*psmid = (((unsigned long long)rp1.even) << 32) + rp1.odd;
+		*psmid = (rp1.even << 32) + rp1.odd;
 		if (resgr0)
 			*resgr0 = 0;
 	}
 
-	return reg1;
-}
+	/* update *length with the nr of bytes stored into the msg buffer */
+	if (length)
+		*length = msglen - rp2.odd;
 
-/*
- * Interface to tell the AP bus code that a configuration
- * change has happened. The bus code should at least do
- * an ap bus resource rescan.
- */
-#if IS_ENABLED(CONFIG_ZCRYPT)
-void ap_bus_cfg_chg(void);
-#else
-static inline void ap_bus_cfg_chg(void){}
-#endif
+	return reg1.status;
+}
 
 #endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index c5bd9f4437e5..99b2902c10fd 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -8,8 +8,9 @@
 #ifndef _ASM_S390_APPLDATA_H
 #define _ASM_S390_APPLDATA_H
 
+#include <linux/io.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
-#include <asm/io.h>
 
 #define APPLDATA_START_INTERVAL_REC	0x80
 #define APPLDATA_STOP_REC		0x81
@@ -48,19 +49,19 @@ static inline int appldata_asm(struct appldata_parameter_list *parm_list,
 {
 	int ry;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -EOPNOTSUPP;
 	parm_list->diag = 0xdc;
 	parm_list->function = fn;
 	parm_list->parlist_length = sizeof(*parm_list);
 	parm_list->buffer_length = length;
-	parm_list->product_id_addr = (unsigned long) id;
+	parm_list->product_id_addr = virt_to_phys(id);
 	parm_list->buffer_addr = virt_to_phys(buffer);
 	diag_stat_inc(DIAG_STAT_X0DC);
 	asm volatile(
 		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
-		: "d" (parm_list), "m" (*parm_list), "m" (*id)
+		: "d" (virt_to_phys(parm_list)), "m" (*parm_list), "m" (*id)
 		: "cc");
 	return ry;
 }
diff --git a/arch/s390/include/asm/arch_hweight.h b/arch/s390/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..aca08b0acbc1
--- /dev/null
+++ b/arch/s390/include/asm/arch_hweight.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ARCH_HWEIGHT_H
+#define _ASM_S390_ARCH_HWEIGHT_H
+
+#include <linux/types.h>
+#include <asm/march.h>
+
+static __always_inline unsigned long popcnt_z196(unsigned long w)
+{
+	unsigned long cnt;
+
+	asm volatile(".insn	rrf,0xb9e10000,%[cnt],%[w],0,0"
+		     : [cnt] "=d" (cnt)
+		     : [w] "d" (w)
+		     : "cc");
+	return cnt;
+}
+
+static __always_inline unsigned long popcnt_z15(unsigned long w)
+{
+	unsigned long cnt;
+
+	asm volatile(".insn	rrf,0xb9e10000,%[cnt],%[w],8,0"
+		     : [cnt] "=d" (cnt)
+		     : [w] "d" (w)
+		     : "cc");
+	return cnt;
+}
+
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+	if (__is_defined(MARCH_HAS_Z15_FEATURES))
+		return popcnt_z15(w);
+	if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 32;
+		w += w >> 16;
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight64(w);
+}
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+	if (__is_defined(MARCH_HAS_Z15_FEATURES))
+		return popcnt_z15(w);
+	if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 16;
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight32(w);
+}
+
+static __always_inline unsigned int __arch_hweight16(unsigned int w)
+{
+	if (__is_defined(MARCH_HAS_Z15_FEATURES))
+		return popcnt_z15((unsigned short)w);
+	if (__is_defined(MARCH_HAS_Z196_FEATURES)) {
+		w = popcnt_z196(w);
+		w += w >> 8;
+		return w & 0xff;
+	}
+	return __sw_hweight16(w);
+}
+
+static __always_inline unsigned int __arch_hweight8(unsigned int w)
+{
+	if (__is_defined(MARCH_HAS_Z196_FEATURES))
+		return popcnt_z196((unsigned char)w);
+	return __sw_hweight8(w);
+}
+
+#endif /* _ASM_S390_ARCH_HWEIGHT_H */
diff --git a/arch/s390/include/asm/asce.h b/arch/s390/include/asm/asce.h
new file mode 100644
index 000000000000..f6dfaaba735a
--- /dev/null
+++ b/arch/s390/include/asm/asce.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ASCE_H
+#define _ASM_S390_ASCE_H
+
+#include <linux/thread_info.h>
+#include <linux/irqflags.h>
+#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+
+static inline bool enable_sacf_uaccess(void)
+{
+	unsigned long flags;
+
+	if (test_thread_flag(TIF_ASCE_PRIMARY))
+		return true;
+	local_irq_save(flags);
+	local_ctl_load(1, &get_lowcore()->kernel_asce);
+	set_thread_flag(TIF_ASCE_PRIMARY);
+	local_irq_restore(flags);
+	return false;
+}
+
+static inline void disable_sacf_uaccess(bool previous)
+{
+	unsigned long flags;
+
+	if (previous)
+		return;
+	local_irq_save(flags);
+	local_ctl_load(1, &get_lowcore()->user_asce);
+	clear_thread_flag(TIF_ASCE_PRIMARY);
+	local_irq_restore(flags);
+}
+
+#endif /* _ASM_S390_ASCE_H */
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index b74f1070ddb2..d23ea0c94e4e 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -9,9 +9,13 @@
 #define EX_TYPE_NONE		0
 #define EX_TYPE_FIXUP		1
 #define EX_TYPE_BPF		2
-#define EX_TYPE_UA_STORE	3
-#define EX_TYPE_UA_LOAD_MEM	4
+#define EX_TYPE_UA_FAULT	3
 #define EX_TYPE_UA_LOAD_REG	5
+#define EX_TYPE_UA_LOAD_REGPAIR	6
+#define EX_TYPE_ZEROPAD		7
+#define EX_TYPE_FPC		8
+#define EX_TYPE_UA_MVCOS_TO	9
+#define EX_TYPE_UA_MVCOS_FROM	10
 
 #define EX_DATA_REG_ERR_SHIFT	0
 #define EX_DATA_REG_ERR		GENMASK(3, 0)
@@ -22,18 +26,9 @@
 #define EX_DATA_LEN_SHIFT	8
 #define EX_DATA_LEN		GENMASK(11, 8)
 
-#define __EX_TABLE(_section, _fault, _target, _type)			\
-	stringify_in_c(.section	_section,"a";)				\
-	stringify_in_c(.align	4;)					\
-	stringify_in_c(.long	(_fault) - .;)				\
-	stringify_in_c(.long	(_target) - .;)				\
-	stringify_in_c(.short	(_type);)				\
-	stringify_in_c(.short	0;)					\
-	stringify_in_c(.previous)
-
-#define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\
+#define __EX_TABLE(_section, _fault, _target, _type, _regerr, _regaddr, _len)	\
 	stringify_in_c(.section _section,"a";)					\
-	stringify_in_c(.align	4;)						\
+	stringify_in_c(.balign	4;)						\
 	stringify_in_c(.long	(_fault) - .;)					\
 	stringify_in_c(.long	(_target) - .;)					\
 	stringify_in_c(.short	(_type);)					\
@@ -71,18 +66,30 @@
 	stringify_in_c(.previous)
 
 #define EX_TABLE(_fault, _target)					\
-	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP)
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
 
 #define EX_TABLE_AMODE31(_fault, _target)				\
-	__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP)
+	__EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0)
 
-#define EX_TABLE_UA_STORE(_fault, _target, _regerr)			\
-	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
-
-#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len)	\
-	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+#define EX_TABLE_UA_FAULT(_fault, _target, _regerr)			\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0)
 
 #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero)	\
-	__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero)	\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
+#define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr)		\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0)
+
+#define EX_TABLE_FPC(_fault, _target)					\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_TO(_fault, _target)				\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_TO, __stringify(%%r0), __stringify(%%r0), 0)
+
+#define EX_TABLE_UA_MVCOS_FROM(_fault, _target)				\
+	__EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_MVCOS_FROM, __stringify(%%r0), __stringify(%%r0), 0)
 
 #endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index c37eb921bfbf..f662eb4b9246 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -3,7 +3,12 @@
 
 #include <linux/kvm_host.h>
 #include <linux/ftrace.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
+#include <asm/nospec-branch.h>
 #include <asm-generic/asm-prototypes.h>
 
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+
 #endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h
new file mode 100644
index 000000000000..e9062b01e2a2
--- /dev/null
+++ b/arch/s390/include/asm/asm.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_H
+#define _ASM_S390_ASM_H
+
+#include <linux/stringify.h>
+
+/*
+ * Helper macros to be used for flag output operand handling.
+ * Inline assemblies must use four of the five supplied macros:
+ *
+ * Use CC_IPM(sym) at the end of the inline assembly; this extracts the
+ * condition code and program mask with the ipm instruction and writes it to
+ * the variable with symbolic name [sym] if the compiler has no support for
+ * flag output operands. If the compiler has support for flag output operands
+ * this generates no code.
+ *
+ * Use CC_OUT(sym, var) at the output operand list of an inline assembly. This
+ * defines an output operand with symbolic name [sym] for the variable
+ * [var]. [var] must be an int variable and [sym] must be identical with [sym]
+ * used with CC_IPM().
+ *
+ * Use either CC_CLOBBER or CC_CLOBBER_LIST() for the clobber list. Use
+ * CC_CLOBBER if the clobber list contains only "cc", otherwise use
+ * CC_CLOBBER_LIST() and add all clobbers as argument to the macro.
+ *
+ * Use CC_TRANSFORM() to convert the variable [var] which contains the
+ * extracted condition code. If the condition code is extracted with ipm, the
+ * [var] also contains the program mask. CC_TRANSFORM() moves the condition
+ * code to the two least significant bits and sets all other bits to zero.
+ */
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN))
+
+#define __HAVE_ASM_FLAG_OUTPUTS__
+
+#define CC_IPM(sym)
+#define CC_OUT(sym, var)	"=@cc" (var)
+#define CC_TRANSFORM(cc)	({ cc; })
+#define CC_CLOBBER
+#define CC_CLOBBER_LIST(...)	__VA_ARGS__
+
+#else
+
+#define CC_IPM(sym)		"	ipm	%[" __stringify(sym) "]\n"
+#define CC_OUT(sym, var)	[sym] "=d" (var)
+#define CC_TRANSFORM(cc)	({ (cc) >> 28; })
+#define CC_CLOBBER		"cc"
+#define CC_CLOBBER_LIST(...)	"cc", __VA_ARGS__
+
+#endif
+
+#endif /* _ASM_S390_ASM_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7138d189cc42..b36dd6a1d652 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,46 +15,76 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
 {
-	return __atomic_read(v);
+	return __atomic_read(&v->counter);
 }
 #define arch_atomic_read arch_atomic_read
 
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
 {
-	__atomic_set(v, i);
+	__atomic_set(&v->counter, i);
 }
 #define arch_atomic_set arch_atomic_set
 
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
 {
 	return __atomic_add_barrier(i, &v->counter) + i;
 }
 #define arch_atomic_add_return arch_atomic_add_return
 
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
 {
 	return __atomic_add_barrier(i, &v->counter);
 }
 #define arch_atomic_fetch_add arch_atomic_fetch_add
 
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
 {
 	__atomic_add(i, &v->counter);
 }
 #define arch_atomic_add arch_atomic_add
 
+static __always_inline void arch_atomic_inc(atomic_t *v)
+{
+	__atomic_add_const(1, &v->counter);
+}
+#define arch_atomic_inc arch_atomic_inc
+
+static __always_inline void arch_atomic_dec(atomic_t *v)
+{
+	__atomic_add_const(-1, &v->counter);
+}
+#define arch_atomic_dec arch_atomic_dec
+
+static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
+{
+	return __atomic_add_and_test_barrier(-i, &v->counter);
+}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
+
+static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
+{
+	return __atomic_add_const_and_test_barrier(-1, &v->counter);
+}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
+
+static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
+{
+	return __atomic_add_const_and_test_barrier(1, &v->counter);
+}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
+
 #define arch_atomic_sub(_i, _v)		arch_atomic_add(-(int)(_i), _v)
 #define arch_atomic_sub_return(_i, _v)	arch_atomic_add_return(-(int)(_i), _v)
 #define arch_atomic_fetch_sub(_i, _v)	arch_atomic_fetch_add(-(int)(_i), _v)
 
 #define ATOMIC_OPS(op)							\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
+static __always_inline void arch_atomic_##op(int i, atomic_t *v)	\
 {									\
 	__atomic_##op(i, &v->counter);					\
 }									\
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v)	\
 {									\
 	return __atomic_##op##_barrier(i, &v->counter);			\
 }
@@ -72,62 +102,112 @@ ATOMIC_OPS(xor)
 #define arch_atomic_fetch_or		arch_atomic_fetch_or
 #define arch_atomic_fetch_xor		arch_atomic_fetch_xor
 
-#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
+static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
+{
+	return arch_xchg(&v->counter, new);
+}
+#define arch_atomic_xchg arch_atomic_xchg
 
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
-	return __atomic_cmpxchg(&v->counter, old, new);
+	return arch_cmpxchg(&v->counter, old, new);
 }
 #define arch_atomic_cmpxchg arch_atomic_cmpxchg
 
+static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	return arch_try_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+
 #define ATOMIC64_INIT(i)  { (i) }
 
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
 {
-	return __atomic64_read(v);
+	return __atomic64_read((long *)&v->counter);
 }
 #define arch_atomic64_read arch_atomic64_read
 
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
-	__atomic64_set(v, i);
+	__atomic64_set((long *)&v->counter, i);
 }
 #define arch_atomic64_set arch_atomic64_set
 
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
 {
 	return __atomic64_add_barrier(i, (long *)&v->counter) + i;
 }
 #define arch_atomic64_add_return arch_atomic64_add_return
 
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	return __atomic64_add_barrier(i, (long *)&v->counter);
 }
 #define arch_atomic64_fetch_add arch_atomic64_fetch_add
 
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
 {
 	__atomic64_add(i, (long *)&v->counter);
 }
 #define arch_atomic64_add arch_atomic64_add
 
-#define arch_atomic64_xchg(v, new)	(arch_xchg(&((v)->counter), new))
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
+{
+	__atomic64_add_const(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc arch_atomic64_inc
+
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
+{
+	__atomic64_add_const(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec arch_atomic64_dec
 
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
-	return __atomic64_cmpxchg((long *)&v->counter, old, new);
+	return __atomic64_add_and_test_barrier(-i, (long *)&v->counter);
+}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
+
+static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
+{
+	return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter);
+}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
+
+static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
+{
+	return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter);
+}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
+
+static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
+{
+	return arch_xchg(&v->counter, new);
+}
+#define arch_atomic64_xchg arch_atomic64_xchg
+
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg(&v->counter, old, new);
 }
 #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
-#define ATOMIC64_OPS(op)						\
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v)		\
-{									\
-	__atomic64_##op(i, (long *)&v->counter);			\
-}									\
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v)	\
-{									\
-	return __atomic64_##op##_barrier(i, (long *)&v->counter);	\
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+	return arch_try_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
+#define ATOMIC64_OPS(op)							\
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v)		\
+{										\
+	__atomic64_##op(i, (long *)&v->counter);				\
+}										\
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v)	\
+{										\
+	return __atomic64_##op##_barrier(i, (long *)&v->counter);		\
 }
 
 ATOMIC64_OPS(and)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 50510e08b893..21c26d842832 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,44 +8,60 @@
 #ifndef __ARCH_S390_ATOMIC_OPS__
 #define __ARCH_S390_ATOMIC_OPS__
 
-static inline int __atomic_read(const atomic_t *v)
+#include <linux/limits.h>
+#include <asm/march.h>
+#include <asm/asm.h>
+
+static __always_inline int __atomic_read(const int *ptr)
 {
-	int c;
+	int val;
 
 	asm volatile(
-		"	l	%0,%1\n"
-		: "=d" (c) : "R" (v->counter));
-	return c;
+		"	l	%[val],%[ptr]\n"
+		: [val] "=d" (val) : [ptr] "R" (*ptr));
+	return val;
 }
 
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(int *ptr, int val)
 {
-	asm volatile(
-		"	st	%1,%0\n"
-		: "=R" (v->counter) : "d" (i));
+	if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
+		asm volatile(
+			"	mvhi	%[ptr],%[val]\n"
+			: [ptr] "=Q" (*ptr) : [val] "K" (val));
+	} else {
+		asm volatile(
+			"	st	%[val],%[ptr]\n"
+			: [ptr] "=R" (*ptr) : [val] "d" (val));
+	}
 }
 
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline long __atomic64_read(const long *ptr)
 {
-	s64 c;
+	long val;
 
 	asm volatile(
-		"	lg	%0,%1\n"
-		: "=d" (c) : "RT" (v->counter));
-	return c;
+		"	lg	%[val],%[ptr]\n"
+		: [val] "=d" (val) : [ptr] "RT" (*ptr));
+	return val;
 }
 
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(long *ptr, long val)
 {
-	asm volatile(
-		"	stg	%1,%0\n"
-		: "=RT" (v->counter) : "d" (i));
+	if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) {
+		asm volatile(
+			"	mvghi	%[ptr],%[val]\n"
+			: [ptr] "=Q" (*ptr) : [val] "K" (val));
+	} else {
+		asm volatile(
+			"	stg	%[val],%[ptr]\n"
+			: [ptr] "=RT" (*ptr) : [val] "d" (val));
+	}
 }
 
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
 
 #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier)		\
-static inline op_type op_name(op_type val, op_type *ptr)		\
+static __always_inline op_type op_name(op_type val, op_type *ptr)	\
 {									\
 	op_type old;							\
 									\
@@ -58,7 +74,7 @@ static inline op_type op_name(op_type val, op_type *ptr)		\
 }									\
 
 #define __ATOMIC_OPS(op_name, op_type, op_string)			\
-	__ATOMIC_OP(op_name, op_type, op_string, "\n")			\
+	__ATOMIC_OP(op_name, op_type, op_string, "")			\
 	__ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
 __ATOMIC_OPS(__atomic_add, int, "laa")
@@ -84,7 +100,7 @@ static __always_inline void op_name(op_type val, op_type *ptr)		\
 }
 
 #define __ATOMIC_CONST_OPS(op_name, op_type, op_string)			\
-	__ATOMIC_CONST_OP(op_name, op_type, op_string, "\n")		\
+	__ATOMIC_CONST_OP(op_name, op_type, op_string, "")		\
 	__ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
 __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi")
@@ -93,10 +109,10 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
 #undef __ATOMIC_CONST_OPS
 #undef __ATOMIC_CONST_OP
 
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#else /* MARCH_HAS_Z196_FEATURES */
 
 #define __ATOMIC_OP(op_name, op_string)					\
-static inline int op_name(int val, int *ptr)				\
+static __always_inline int op_name(int val, int *ptr)			\
 {									\
 	int old, new;							\
 									\
@@ -122,7 +138,7 @@ __ATOMIC_OPS(__atomic_xor, "xr")
 #undef __ATOMIC_OPS
 
 #define __ATOMIC64_OP(op_name, op_string)				\
-static inline long op_name(long val, long *ptr)				\
+static __always_inline long op_name(long val, long *ptr)		\
 {									\
 	long old, new;							\
 									\
@@ -147,55 +163,83 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
 
 #undef __ATOMIC64_OPS
 
-#define __atomic_add_const(val, ptr)		__atomic_add(val, ptr)
-#define __atomic_add_const_barrier(val, ptr)	__atomic_add(val, ptr)
-#define __atomic64_add_const(val, ptr)		__atomic64_add(val, ptr)
-#define __atomic64_add_const_barrier(val, ptr)	__atomic64_add(val, ptr)
+#define __atomic_add_const(val, ptr)		((void)__atomic_add(val, ptr))
+#define __atomic_add_const_barrier(val, ptr)	((void)__atomic_add(val, ptr))
+#define __atomic64_add_const(val, ptr)		((void)__atomic64_add(val, ptr))
+#define __atomic64_add_const_barrier(val, ptr)	((void)__atomic64_add(val, ptr))
 
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#endif /* MARCH_HAS_Z196_FEATURES */
 
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
-{
-	asm volatile(
-		"	cs	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+Q" (*ptr)
-		: [new] "d" (new)
-		: "cc", "memory");
-	return old;
-}
+#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__)
 
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
-{
-	int old_expected = old;
+#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier)	\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	op_type tmp;							\
+	int cc;								\
+									\
+	asm volatile(							\
+		op_string "	%[tmp],%[val],%[ptr]\n"			\
+		op_barrier						\
+		: "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr)	\
+		: [val] "d" (val)					\
+		: "memory");						\
+	return (cc == 0) || (cc == 2);					\
+}									\
 
-	asm volatile(
-		"	cs	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+Q" (*ptr)
-		: [new] "d" (new)
-		: "cc", "memory");
-	return old == old_expected;
-}
+#define __ATOMIC_TEST_OPS(op_name, op_type, op_string)			\
+	__ATOMIC_TEST_OP(op_name, op_type, op_string, "")		\
+	__ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
-{
-	asm volatile(
-		"	csg	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+QS" (*ptr)
-		: [new] "d" (new)
-		: "cc", "memory");
-	return old;
+__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal")
+__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg")
+
+#undef __ATOMIC_TEST_OPS
+#undef __ATOMIC_TEST_OP
+
+#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier)	\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	int cc;								\
+									\
+	asm volatile(							\
+		op_string "	%[ptr],%[val]\n"			\
+		op_barrier						\
+		: "=@cc" (cc), [ptr] "+QS" (*ptr)			\
+		: [val] "i" (val)					\
+		: "memory");						\
+	return (cc == 0) || (cc == 2);					\
 }
 
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
-{
-	long old_expected = old;
+#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string)		\
+	__ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "")		\
+	__ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
 
-	asm volatile(
-		"	csg	%[old],%[new],%[ptr]"
-		: [old] "+d" (old), [ptr] "+QS" (*ptr)
-		: [new] "d" (new)
-		: "cc", "memory");
-	return old == old_expected;
+__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi")
+__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi")
+
+#undef __ATOMIC_CONST_TEST_OPS
+#undef __ATOMIC_CONST_TEST_OP
+
+#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
+#define __ATOMIC_TEST_OP(op_name, op_func, op_type)			\
+static __always_inline bool op_name(op_type val, op_type *ptr)		\
+{									\
+	return op_func(val, ptr) == -val;				\
 }
 
+__ATOMIC_TEST_OP(__atomic_add_and_test,			__atomic_add,		int)
+__ATOMIC_TEST_OP(__atomic_add_and_test_barrier,		__atomic_add_barrier,	int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test,		__atomic_add,		int)
+__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier,	__atomic_add_barrier,	int)
+__ATOMIC_TEST_OP(__atomic64_add_and_test,		__atomic64_add,		long)
+__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier,	__atomic64_add_barrier, long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test,		__atomic64_add,		long)
+__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier,	__atomic64_add_barrier,	long)
+
+#undef __ATOMIC_TEST_OP
+
+#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */
+
 #endif /* __ARCH_S390_ATOMIC_OPS__  */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 82de2a7c4160..d82130d7f2b6 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -8,13 +8,15 @@
 #ifndef __ASM_BARRIER_H
 #define __ASM_BARRIER_H
 
+#include <asm/march.h>
+
 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
  * to devices.
  */
 
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
 /* Fast-BCR without checkpoint synchronization */
 #define __ASM_BCR_SERIALIZE "bcr 14,0\n"
 #else
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 191dc7898b0f..a5ca0a947691 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -36,179 +36,45 @@
 #include <linux/typecheck.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <asm/atomic_ops.h>
-#include <asm/barrier.h>
-
-#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-static inline unsigned long *
-__bitops_word(unsigned long nr, const volatile unsigned long *ptr)
-{
-	unsigned long addr;
-
-	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
-	return (unsigned long *)addr;
-}
-
-static inline unsigned long __bitops_mask(unsigned long nr)
-{
-	return 1UL << (nr & (BITS_PER_LONG - 1));
-}
-
-static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_or(mask, (long *)addr);
-}
-
-static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_and(~mask, (long *)addr);
-}
-
-static __always_inline void arch_change_bit(unsigned long nr,
-					    volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	__atomic64_xor(mask, (long *)addr);
-}
-
-static inline bool arch_test_and_set_bit(unsigned long nr,
-					 volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_or_barrier(mask, (long *)addr);
-	return old & mask;
-}
-
-static inline bool arch_test_and_clear_bit(unsigned long nr,
-					   volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_and_barrier(~mask, (long *)addr);
-	return old & mask;
-}
-
-static inline bool arch_test_and_change_bit(unsigned long nr,
-					    volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = __atomic64_xor_barrier(mask, (long *)addr);
-	return old & mask;
-}
-
-static inline void arch___set_bit(unsigned long nr, volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*addr |= mask;
-}
-
-static inline void arch___clear_bit(unsigned long nr,
-				    volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*addr &= ~mask;
-}
-
-static inline void arch___change_bit(unsigned long nr,
-				     volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	*addr ^= mask;
-}
-
-static inline bool arch___test_and_set_bit(unsigned long nr,
-					   volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *addr;
-	*addr |= mask;
-	return old & mask;
-}
-
-static inline bool arch___test_and_clear_bit(unsigned long nr,
-					     volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *addr;
-	*addr &= ~mask;
-	return old & mask;
-}
-
-static inline bool arch___test_and_change_bit(unsigned long nr,
-					      volatile unsigned long *ptr)
-{
-	unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-	unsigned long old;
-
-	old = *addr;
-	*addr ^= mask;
-	return old & mask;
-}
-
-static inline bool arch_test_bit(unsigned long nr,
-				 const volatile unsigned long *ptr)
-{
-	const volatile unsigned long *addr = __bitops_word(nr, ptr);
-	unsigned long mask = __bitops_mask(nr);
-
-	return *addr & mask;
-}
-
-static inline bool arch_test_and_set_bit_lock(unsigned long nr,
-					      volatile unsigned long *ptr)
-{
-	if (arch_test_bit(nr, ptr))
-		return true;
-	return arch_test_and_set_bit(nr, ptr);
-}
-
-static inline void arch_clear_bit_unlock(unsigned long nr,
-					 volatile unsigned long *ptr)
-{
-	smp_mb__before_atomic();
-	arch_clear_bit(nr, ptr);
-}
-
-static inline void arch___clear_bit_unlock(unsigned long nr,
-					   volatile unsigned long *ptr)
-{
-	smp_mb();
-	arch___clear_bit(nr, ptr);
+#include <asm/asm.h>
+
+#define arch___set_bit			generic___set_bit
+#define arch___clear_bit		generic___clear_bit
+#define arch___change_bit		generic___change_bit
+#define arch___test_and_set_bit		generic___test_and_set_bit
+#define arch___test_and_clear_bit	generic___test_and_clear_bit
+#define arch___test_and_change_bit	generic___test_and_change_bit
+#define arch_test_bit_acquire		generic_test_bit_acquire
+
+static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+	const volatile unsigned char *addr;
+	unsigned long mask;
+	int cc;
+
+	/*
+	 * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to
+	 * handle __builtin_constant_p() in some cases.
+	 */
+	if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) {
+		addr = (const volatile unsigned char *)ptr;
+		addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE;
+		mask = 1UL << (nr & (BITS_PER_BYTE - 1));
+		asm volatile(
+			"	tm	%[addr],%[mask]\n"
+			: "=@cc" (cc)
+			: [addr] "Q" (*addr), [mask] "I" (mask)
+			);
+		return cc == 3;
+	}
+#endif
+	return generic_test_bit(nr, ptr);
 }
 
-#include <asm-generic/bitops/instrumented-atomic.h>
-#include <asm-generic/bitops/instrumented-non-atomic.h>
-#include <asm-generic/bitops/instrumented-lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-instrumented-non-atomic.h>
+#include <asm-generic/bitops/lock.h>
 
 /*
  * Functions which use MSB0 bit numbering.
@@ -374,8 +240,9 @@ static inline int fls(unsigned int word)
 	return fls64(word);
 }
 
+#include <asm/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index f7eed27b3220..f55f8227058e 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_S390_BOOT_DATA_H
 
+#include <linux/string.h>
 #include <asm/setup.h>
 #include <asm/ipl.h>
 
@@ -15,4 +16,54 @@ extern unsigned long ipl_cert_list_size;
 extern unsigned long early_ipl_comp_list_addr;
 extern unsigned long early_ipl_comp_list_size;
 
+extern char boot_rb[PAGE_SIZE * 2];
+extern bool boot_earlyprintk;
+extern size_t boot_rb_off;
+extern char bootdebug_filter[128];
+extern bool bootdebug;
+
+#define boot_rb_foreach(cb)							\
+	do {									\
+		size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1;	\
+		size_t len;							\
+		for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \
+			cb(boot_rb + off);					\
+		for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \
+			cb(boot_rb + off);					\
+	} while (0)
+
+/*
+ * bootdebug_filter is a comma separated list of strings,
+ * where each string can be a prefix of the message.
+ */
+static inline bool bootdebug_filter_match(const char *buf)
+{
+	char *p = bootdebug_filter, *s;
+	char *end;
+
+	if (!*p)
+		return true;
+
+	end = p + strlen(p);
+	while (p < end) {
+		p = skip_spaces(p);
+		s = memscan(p, ',', end - p);
+		if (!strncmp(p, buf, s - p))
+			return true;
+		p = s + 1;
+	}
+	return false;
+}
+
+static inline const char *skip_timestamp(const char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+	const char *p = memchr(buf, ']', strlen(buf));
+
+	if (p && p[1] == ' ')
+		return p + 2;
+#endif
+	return buf;
+}
+
 #endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index aebe1e22c7be..c500d45fb465 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -14,7 +14,7 @@
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
 		"1:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
-		".section __bug_table,\"awM\",@progbits,%2\n"	\
+		".section __bug_table,\"aw\"\n"			\
 		"2:	.long	0b-.\n"				\
 		"	.long	1b-.\n"				\
 		"	.short	%0,%1\n"			\
@@ -30,7 +30,7 @@
 #define __EMIT_BUG(x) do {					\
 	asm_inline volatile(					\
 		"0:	mc	0,0\n"				\
-		".section __bug_table,\"awM\",@progbits,%1\n"	\
+		".section __bug_table,\"aw\"\n"			\
 		"1:	.long	0b-.\n"				\
 		"	.short	%0\n"				\
 		"	.org	1b+%1\n"			\
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
deleted file mode 100644
index aa42a179be33..000000000000
--- a/arch/s390/include/asm/bugs.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  S390 version
- *    Copyright IBM Corp. 1999
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  Derived from "include/asm-i386/bugs.h"
- *    Copyright (C) 1994  Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- *      void check_bugs(void);
- */
-
-static inline void check_bugs(void)
-{
-  /* s390 has no bugs ... */
-}
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index d4e90f2ba77e..e3afcece375e 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -15,6 +15,7 @@
 #include <asm/fcx.h>
 #include <asm/irq.h>
 #include <asm/schid.h>
+#include <linux/mutex.h>
 
 /* structs from asm/cio.h */
 struct irb;
@@ -87,6 +88,7 @@ struct ccw_device {
 	spinlock_t *ccwlock;
 /* private: */
 	struct ccw_device_private *private;	/* cio private information */
+	struct mutex reg_mutex;
 /* public: */
 	struct ccw_device_id id;
 	struct ccw_driver *drv;
@@ -208,15 +210,15 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
 #define get_ccwdev_lock(x) (x)->ccwlock
 
 #define to_ccwdev(n) container_of(n, struct ccw_device, dev)
-#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+#define to_ccwdrv(n) container_of_const(n, struct ccw_driver, driver)
 
 extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
 extern void ccw_device_destroy_console(struct ccw_device *);
 extern int ccw_device_enable_console(struct ccw_device *);
 extern void ccw_device_wait_idle(struct ccw_device *);
-extern int ccw_device_force_console(struct ccw_device *);
 
-extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size,
+				   dma32_t *dma_handle);
 extern void ccw_device_dma_free(struct ccw_device *cdev,
 				void *cpu_addr, size_t size);
 
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index cdd19d326345..d86dea5900e7 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -12,28 +12,19 @@
 #ifndef _S390_CHECKSUM_H
 #define _S390_CHECKSUM_H
 
-#include <linux/uaccess.h>
+#include <linux/instrumented.h>
+#include <linux/kmsan-checks.h>
 #include <linux/in6.h>
 
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * Returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic.
- *
- * This function must be called with even lengths, except
- * for the last fragment, which may be odd.
- *
- * It's best to have buff aligned on a 32-bit boundary.
- */
-static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum cksm(const void *buff, int len, __wsum sum)
 {
 	union register_pair rp = {
-		.even = (unsigned long) buff,
-		.odd = (unsigned long) len,
+		.even = (unsigned long)buff,
+		.odd = (unsigned long)len,
 	};
 
+	instrument_read(buff, len);
+	kmsan_check_memory(buff, len);
 	asm volatile(
 		"0:	cksm	%[sum],%[rp]\n"
 		"	jo	0b\n"
@@ -41,6 +32,11 @@ static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
 	return sum;
 }
 
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
 /*
  * Fold a partial checksum without adding pseudo headers.
  */
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index bb48ea380c0d..bb78159d8042 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -11,6 +11,9 @@
 
 #include <uapi/asm/chsc.h>
 
+/* struct from linux/notifier.h */
+struct notifier_block;
+
 /**
  * Operation codes for CHSC PNSO:
  *    PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
@@ -66,4 +69,16 @@ struct chsc_pnso_area {
 	struct chsc_pnso_naid_l2 entries[];
 } __packed __aligned(PAGE_SIZE);
 
+/*
+ * notifier interface - registered notifiers gets called on
+ * the following events:
+ * - ap config changed (CHSC_NOTIFY_AP_CFG)
+ */
+enum chsc_notify_type {
+	CHSC_NOTIFY_AP_CFG = 3,
+};
+
+int chsc_notifier_register(struct notifier_block *nb);
+int chsc_notifier_unregister(struct notifier_block *nb);
+
 #endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1c4f585dd39b..b6b619f340a5 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -7,6 +7,7 @@
 
 #include <linux/bitops.h>
 #include <linux/genalloc.h>
+#include <asm/dma-types.h>
 #include <asm/types.h>
 #include <asm/tpi.h>
 
@@ -32,7 +33,7 @@ struct ccw1 {
 	__u8  cmd_code;
 	__u8  flags;
 	__u16 count;
-	__u32 cda;
+	dma32_t cda;
 } __attribute__ ((packed,aligned(8)));
 
 /**
@@ -152,8 +153,8 @@ struct sublog {
 struct esw0 {
 	struct sublog sublog;
 	struct erw erw;
-	__u32  faddr[2];
-	__u32  saddr;
+	dma32_t faddr[2];
+	dma32_t saddr;
 } __attribute__ ((packed));
 
 /**
@@ -364,6 +365,8 @@ extern struct device *cio_get_dma_css_dev(void);
 
 void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
 			size_t size);
+void *__cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+			  size_t size, dma32_t *dma_handle);
 void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
 void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
 struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 84c3f0d576c5..a9e2006033b7 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -11,194 +11,263 @@
 #include <linux/mmdebug.h>
 #include <linux/types.h>
 #include <linux/bug.h>
+#include <asm/asm.h>
 
-void __xchg_called_with_bad_pointer(void);
+void __cmpxchg_called_with_bad_pointer(void);
 
-static __always_inline unsigned long __xchg(unsigned long x,
-					    unsigned long address, int size)
+static __always_inline u32 __cs_asm(u64 ptr, u32 old, u32 new)
 {
-	unsigned long old;
-	int shift;
+	asm volatile(
+		"	cs	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+Q" (*(u32 *)ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
+
+static __always_inline u64 __csg_asm(u64 ptr, u64 old, u64 new)
+{
+	asm volatile(
+		"	csg	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+QS" (*(u64 *)ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
+
+static inline u8 __arch_cmpxchg1(u64 ptr, u8 old, u8 new)
+{
+	union {
+		u8 b[4];
+		u32 w;
+	} old32, new32;
+	u32 prev;
+	int i;
+
+	i = ptr & 3;
+	ptr &= ~0x3;
+	prev = READ_ONCE(*(u32 *)ptr);
+	do {
+		old32.w = prev;
+		if (old32.b[i] != old)
+			return old32.b[i];
+		new32.w = old32.w;
+		new32.b[i] = new;
+		prev = __cs_asm(ptr, old32.w, new32.w);
+	} while (prev != old32.w);
+	return old;
+}
+
+static inline u16 __arch_cmpxchg2(u64 ptr, u16 old, u16 new)
+{
+	union {
+		u16 b[2];
+		u32 w;
+	} old32, new32;
+	u32 prev;
+	int i;
+
+	i = (ptr & 3) >> 1;
+	ptr &= ~0x3;
+	prev = READ_ONCE(*(u32 *)ptr);
+	do {
+		old32.w = prev;
+		if (old32.b[i] != old)
+			return old32.b[i];
+		new32.w = old32.w;
+		new32.b[i] = new;
+		prev = __cs_asm(ptr, old32.w, new32.w);
+	} while (prev != old32.w);
+	return old;
+}
 
+static __always_inline u64 __arch_cmpxchg(u64 ptr, u64 old, u64 new, int size)
+{
 	switch (size) {
-	case 1:
-		shift = (3 ^ (address & 3)) << 3;
-		address ^= address & 3;
-		asm volatile(
-			"       l       %0,%1\n"
-			"0:     lr      0,%0\n"
-			"       nr      0,%3\n"
-			"       or      0,%2\n"
-			"       cs      %0,0,%1\n"
-			"       jl      0b\n"
-			: "=&d" (old), "+Q" (*(int *) address)
-			: "d" ((x & 0xff) << shift), "d" (~(0xff << shift))
-			: "memory", "cc", "0");
-		return old >> shift;
-	case 2:
-		shift = (2 ^ (address & 2)) << 3;
-		address ^= address & 2;
-		asm volatile(
-			"       l       %0,%1\n"
-			"0:     lr      0,%0\n"
-			"       nr      0,%3\n"
-			"       or      0,%2\n"
-			"       cs      %0,0,%1\n"
-			"       jl      0b\n"
-			: "=&d" (old), "+Q" (*(int *) address)
-			: "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift))
-			: "memory", "cc", "0");
-		return old >> shift;
-	case 4:
-		asm volatile(
-			"       l       %0,%1\n"
-			"0:     cs      %0,%2,%1\n"
-			"       jl      0b\n"
-			: "=&d" (old), "+Q" (*(int *) address)
-			: "d" (x)
-			: "memory", "cc");
-		return old;
-	case 8:
-		asm volatile(
-			"       lg      %0,%1\n"
-			"0:     csg     %0,%2,%1\n"
-			"       jl      0b\n"
-			: "=&d" (old), "+QS" (*(long *) address)
-			: "d" (x)
-			: "memory", "cc");
-		return old;
+	case 1:	 return __arch_cmpxchg1(ptr, old & 0xff, new & 0xff);
+	case 2:  return __arch_cmpxchg2(ptr, old & 0xffff, new & 0xffff);
+	case 4:  return __cs_asm(ptr, old & 0xffffffff, new & 0xffffffff);
+	case 8:  return __csg_asm(ptr, old, new);
+	default: __cmpxchg_called_with_bad_pointer();
 	}
-	__xchg_called_with_bad_pointer();
-	return x;
+	return old;
 }
 
-#define arch_xchg(ptr, x)						\
+#define arch_cmpxchg(ptr, o, n)						\
 ({									\
-	__typeof__(*(ptr)) __ret;					\
+	(__typeof__(*(ptr)))__arch_cmpxchg((unsigned long)(ptr),	\
+					   (unsigned long)(o),		\
+					   (unsigned long)(n),		\
+					   sizeof(*(ptr)));		\
+})
+
+#define arch_cmpxchg64		arch_cmpxchg
+#define arch_cmpxchg_local	arch_cmpxchg
+#define arch_cmpxchg64_local	arch_cmpxchg
+
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+
+#define arch_try_cmpxchg(ptr, oldp, new)				\
+({									\
+	__typeof__(ptr) __oldp = (__typeof__(ptr))(oldp);		\
+	__typeof__(*(ptr)) __old = *__oldp;				\
+	__typeof__(*(ptr)) __new = (new);				\
+	__typeof__(*(ptr)) __prev;					\
+	int __cc;							\
 									\
-	__ret = (__typeof__(*(ptr)))					\
-		__xchg((unsigned long)(x), (unsigned long)(ptr),	\
-		       sizeof(*(ptr)));					\
-	__ret;								\
+	switch (sizeof(*(ptr))) {					\
+	case 1:								\
+	case 2: {							\
+		__prev = arch_cmpxchg((ptr), (__old), (__new));		\
+		__cc = (__prev != __old);				\
+		if (unlikely(__cc))					\
+			*__oldp = __prev;				\
+		break;							\
+	}								\
+	case 4:	{							\
+		asm volatile(						\
+			"	cs	%[__old],%[__new],%[__ptr]\n"	\
+			: [__old] "+d" (*__oldp),			\
+			  [__ptr] "+Q" (*(ptr)),			\
+			  "=@cc" (__cc)					\
+			: [__new] "d" (__new)				\
+			: "memory");					\
+		break;							\
+	}								\
+	case 8:	{							\
+		 asm volatile(						\
+			 "	csg	%[__old],%[__new],%[__ptr]\n"	\
+			 : [__old] "+d" (*__oldp),			\
+			   [__ptr] "+QS" (*(ptr)),			\
+			   "=@cc" (__cc)				\
+			 : [__new] "d" (__new)				\
+			 : "memory");					\
+		 break;							\
+	}								\
+	default:							\
+		__cmpxchg_called_with_bad_pointer();			\
+	}								\
+	likely(__cc == 0);						\
 })
 
-void __cmpxchg_called_with_bad_pointer(void);
+#else /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
+#define arch_try_cmpxchg(ptr, oldp, new)				\
+({									\
+	__typeof__((ptr)) __oldp = (__typeof__(ptr))(oldp);		\
+	__typeof__(*(ptr)) __old = *__oldp;				\
+	__typeof__(*(ptr)) __new = (new);				\
+	__typeof__(*(ptr)) __prev;					\
+									\
+	__prev = arch_cmpxchg((ptr), (__old), (__new));			\
+	if (unlikely(__prev != __old))					\
+		*__oldp = __prev;					\
+	likely(__prev == __old);					\
+})
+
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
+#define arch_try_cmpxchg64		arch_try_cmpxchg
+#define arch_try_cmpxchg_local		arch_try_cmpxchg
+#define arch_try_cmpxchg64_local	arch_try_cmpxchg
+
+void __xchg_called_with_bad_pointer(void);
+
+static inline u8 __arch_xchg1(u64 ptr, u8 x)
+{
+	int shift = (3 ^ (ptr & 3)) << 3;
+	u32 mask, old, new;
+
+	ptr &= ~0x3;
+	mask = ~(0xff << shift);
+	old = READ_ONCE(*(u32 *)ptr);
+	do {
+		new = old & mask;
+		new |= x << shift;
+	} while (!arch_try_cmpxchg((u32 *)ptr, &old, new));
+	return old >> shift;
+}
 
-static __always_inline unsigned long __cmpxchg(unsigned long address,
-					       unsigned long old,
-					       unsigned long new, int size)
+static inline u16 __arch_xchg2(u64 ptr, u16 x)
 {
-	unsigned long prev, tmp;
-	int shift;
+	int shift = (2 ^ (ptr & 2)) << 3;
+	u32 mask, old, new;
 
+	ptr &= ~0x3;
+	mask = ~(0xffff << shift);
+	old = READ_ONCE(*(u32 *)ptr);
+	do {
+		new = old & mask;
+		new |= x << shift;
+	} while (!arch_try_cmpxchg((u32 *)ptr, &old, new));
+	return old >> shift;
+}
+
+static __always_inline u64 __arch_xchg(u64 ptr, u64 x, int size)
+{
 	switch (size) {
 	case 1:
-		shift = (3 ^ (address & 3)) << 3;
-		address ^= address & 3;
-		asm volatile(
-			"       l       %0,%2\n"
-			"0:     nr      %0,%5\n"
-			"       lr      %1,%0\n"
-			"       or      %0,%3\n"
-			"       or      %1,%4\n"
-			"       cs      %0,%1,%2\n"
-			"       jnl     1f\n"
-			"       xr      %1,%0\n"
-			"       nr      %1,%5\n"
-			"       jnz     0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
-			: "d" ((old & 0xff) << shift),
-			  "d" ((new & 0xff) << shift),
-			  "d" (~(0xff << shift))
-			: "memory", "cc");
-		return prev >> shift;
+		return __arch_xchg1(ptr, x & 0xff);
 	case 2:
-		shift = (2 ^ (address & 2)) << 3;
-		address ^= address & 2;
-		asm volatile(
-			"       l       %0,%2\n"
-			"0:     nr      %0,%5\n"
-			"       lr      %1,%0\n"
-			"       or      %0,%3\n"
-			"       or      %1,%4\n"
-			"       cs      %0,%1,%2\n"
-			"       jnl     1f\n"
-			"       xr      %1,%0\n"
-			"       nr      %1,%5\n"
-			"       jnz     0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
-			: "d" ((old & 0xffff) << shift),
-			  "d" ((new & 0xffff) << shift),
-			  "d" (~(0xffff << shift))
-			: "memory", "cc");
-		return prev >> shift;
-	case 4:
-		asm volatile(
-			"       cs      %0,%3,%1\n"
-			: "=&d" (prev), "+Q" (*(int *) address)
-			: "0" (old), "d" (new)
-			: "memory", "cc");
-		return prev;
-	case 8:
-		asm volatile(
-			"       csg     %0,%3,%1\n"
-			: "=&d" (prev), "+QS" (*(long *) address)
-			: "0" (old), "d" (new)
-			: "memory", "cc");
-		return prev;
+		return __arch_xchg2(ptr, x & 0xffff);
+	case 4: {
+		u32 old = READ_ONCE(*(u32 *)ptr);
+
+		do {
+		} while (!arch_try_cmpxchg((u32 *)ptr, &old, x & 0xffffffff));
+		return old;
 	}
-	__cmpxchg_called_with_bad_pointer();
-	return old;
+	case 8: {
+		u64 old = READ_ONCE(*(u64 *)ptr);
+
+		do {
+		} while (!arch_try_cmpxchg((u64 *)ptr, &old, x));
+		return old;
+	}
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
 }
 
-#define arch_cmpxchg(ptr, o, n)						\
+#define arch_xchg(ptr, x)						\
 ({									\
-	__typeof__(*(ptr)) __ret;					\
-									\
-	__ret = (__typeof__(*(ptr)))					\
-		__cmpxchg((unsigned long)(ptr), (unsigned long)(o),	\
-			  (unsigned long)(n), sizeof(*(ptr)));		\
-	__ret;								\
+	(__typeof__(*(ptr)))__arch_xchg((unsigned long)(ptr),		\
+					(unsigned long)(x),		\
+					sizeof(*(ptr)));		\
 })
 
-#define arch_cmpxchg64		arch_cmpxchg
-#define arch_cmpxchg_local	arch_cmpxchg
-#define arch_cmpxchg64_local	arch_cmpxchg
+#define system_has_cmpxchg128()		1
 
-#define system_has_cmpxchg_double()	1
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+	asm volatile(
+		"	cdsg	%[old],%[new],%[ptr]\n"
+		: [old] "+d" (old), [ptr] "+QS" (*ptr)
+		: [new] "d" (new)
+		: "memory", "cc");
+	return old;
+}
+
+#define arch_cmpxchg128		arch_cmpxchg128
+#define arch_cmpxchg128_local	arch_cmpxchg128
+
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
 
-static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2,
-					    unsigned long o1, unsigned long o2,
-					    unsigned long n1, unsigned long n2)
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
 {
-	union register_pair old = { .even = o1, .odd = o2, };
-	union register_pair new = { .even = n1, .odd = n2, };
 	int cc;
 
 	asm volatile(
 		"	cdsg	%[old],%[new],%[ptr]\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=&d" (cc), [old] "+&d" (old.pair)
-		: [new] "d" (new.pair),
-		  [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2)
-		: "memory", "cc");
-	return !cc;
+		: [old] "+d" (*oldp), [ptr] "+QS" (*ptr), "=@cc" (cc)
+		: [new] "d" (new)
+		: "memory");
+	return likely(cc == 0);
 }
 
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
-({									\
-	typeof(p1) __p1 = (p1);						\
-	typeof(p2) __p2 = (p2);						\
-									\
-	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
-	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
-	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
-	__cmpxchg_double((unsigned long)__p1, (unsigned long)__p2,	\
-			 (unsigned long)(o1), (unsigned long)(o2),	\
-			 (unsigned long)(n1), (unsigned long)(n2));	\
-})
+#define arch_try_cmpxchg128		arch_try_cmpxchg128
+#define arch_try_cmpxchg128_local	arch_try_cmpxchg128
+
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
 
 #endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index a386070f1d56..3cb9d813f022 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -112,7 +112,7 @@ struct compat_statfs64 {
 	u32		f_namelen;
 	u32		f_frsize;
 	u32		f_flags;
-	u32		f_spare[4];
+	u32		f_spare[5];
 };
 
 /*
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 646b12981f20..54cb97603ec0 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -2,7 +2,7 @@
 /*
  * CP Assist for Cryptographic Functions (CPACF)
  *
- * Copyright IBM Corp. 2003, 2017
+ * Copyright IBM Corp. 2003, 2023
  * Author(s): Thomas Spatzier
  *	      Jan Glauber
  *	      Harald Freudenberger (freude@de.ibm.com)
@@ -12,6 +12,7 @@
 #define _ASM_S390_CPACF_H
 
 #include <asm/facility.h>
+#include <linux/kmsan-checks.h>
 
 /*
  * Instruction opcodes for the CPACF instructions
@@ -53,6 +54,10 @@
 #define CPACF_KM_XTS_256	0x34
 #define CPACF_KM_PXTS_128	0x3a
 #define CPACF_KM_PXTS_256	0x3c
+#define CPACF_KM_XTS_128_FULL	0x52
+#define CPACF_KM_XTS_256_FULL	0x54
+#define CPACF_KM_PXTS_128_FULL	0x5a
+#define CPACF_KM_PXTS_256_FULL	0x5c
 
 /*
  * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
@@ -120,18 +125,31 @@
 #define CPACF_KMAC_DEA		0x01
 #define CPACF_KMAC_TDEA_128	0x02
 #define CPACF_KMAC_TDEA_192	0x03
+#define CPACF_KMAC_HMAC_SHA_224	0x70
+#define CPACF_KMAC_HMAC_SHA_256	0x71
+#define CPACF_KMAC_HMAC_SHA_384	0x72
+#define CPACF_KMAC_HMAC_SHA_512	0x73
 
 /*
  * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT)
  * instruction
  */
-#define CPACF_PCKMO_QUERY		0x00
-#define CPACF_PCKMO_ENC_DES_KEY		0x01
-#define CPACF_PCKMO_ENC_TDES_128_KEY	0x02
-#define CPACF_PCKMO_ENC_TDES_192_KEY	0x03
-#define CPACF_PCKMO_ENC_AES_128_KEY	0x12
-#define CPACF_PCKMO_ENC_AES_192_KEY	0x13
-#define CPACF_PCKMO_ENC_AES_256_KEY	0x14
+#define CPACF_PCKMO_QUERY		       0x00
+#define CPACF_PCKMO_ENC_DES_KEY		       0x01
+#define CPACF_PCKMO_ENC_TDES_128_KEY	       0x02
+#define CPACF_PCKMO_ENC_TDES_192_KEY	       0x03
+#define CPACF_PCKMO_ENC_AES_128_KEY	       0x12
+#define CPACF_PCKMO_ENC_AES_192_KEY	       0x13
+#define CPACF_PCKMO_ENC_AES_256_KEY	       0x14
+#define CPACF_PCKMO_ENC_AES_XTS_128_DOUBLE_KEY 0x15
+#define CPACF_PCKMO_ENC_AES_XTS_256_DOUBLE_KEY 0x16
+#define CPACF_PCKMO_ENC_ECC_P256_KEY	       0x20
+#define CPACF_PCKMO_ENC_ECC_P384_KEY	       0x21
+#define CPACF_PCKMO_ENC_ECC_P521_KEY	       0x22
+#define CPACF_PCKMO_ENC_ECC_ED25519_KEY	       0x28
+#define CPACF_PCKMO_ENC_ECC_ED448_KEY	       0x29
+#define CPACF_PCKMO_ENC_HMAC_512_KEY	       0x76
+#define CPACF_PCKMO_ENC_HMAC_1024_KEY	       0x7a
 
 /*
  * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
@@ -159,30 +177,126 @@
 #define CPACF_KMA_LAAD	0x200	/* Last-AAD */
 #define CPACF_KMA_HS	0x400	/* Hash-subkey Supplied */
 
+/*
+ * Flags for the KIMD/KLMD (COMPUTE INTERMEDIATE/LAST MESSAGE DIGEST)
+ * instructions
+ */
+#define CPACF_KIMD_NIP		0x8000
+#define CPACF_KLMD_DUFOP	0x4000
+#define CPACF_KLMD_NIP		0x8000
+
+/*
+ * Function codes for KDSA (COMPUTE DIGITAL SIGNATURE AUTHENTICATION)
+ * instruction
+ */
+#define CPACF_KDSA_QUERY 0x00
+#define CPACF_KDSA_ECDSA_VERIFY_P256 0x01
+#define CPACF_KDSA_ECDSA_VERIFY_P384 0x02
+#define CPACF_KDSA_ECDSA_VERIFY_P521 0x03
+#define CPACF_KDSA_ECDSA_SIGN_P256 0x09
+#define CPACF_KDSA_ECDSA_SIGN_P384 0x0a
+#define CPACF_KDSA_ECDSA_SIGN_P521 0x0b
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P256 0x11
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P384 0x12
+#define CPACF_KDSA_ENC_ECDSA_SIGN_P521 0x13
+#define CPACF_KDSA_EDDSA_VERIFY_ED25519 0x20
+#define CPACF_KDSA_EDDSA_VERIFY_ED448 0x24
+#define CPACF_KDSA_EDDSA_SIGN_ED25519 0x28
+#define CPACF_KDSA_EDDSA_SIGN_ED448 0x2c
+#define CPACF_KDSA_ENC_EDDSA_SIGN_ED25519 0x30
+#define CPACF_KDSA_ENC_EDDSA_SIGN_ED448 0x34
+
+#define CPACF_FC_QUERY 0x00
+#define CPACF_FC_QUERY_AUTH_INFO 0x7F
+
 typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+typedef struct { unsigned char bytes[256]; } cpacf_qai_t;
 
-/**
- * cpacf_query() - check if a specific CPACF function is available
- * @opcode: the opcode of the crypto instruction
- * @func: the function code to test for
- *
- * Executes the query function for the given crypto instruction @opcode
- * and checks if @func is available
- *
- * Returns 1 if @func is available for @opcode, 0 otherwise
+/*
+ * Prototype for a not existing function to produce a link
+ * error if __cpacf_query() or __cpacf_check_opcode() is used
+ * with an invalid compile time const opcode.
  */
-static __always_inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+void __cpacf_bad_opcode(void);
+
+static __always_inline void __cpacf_query_rre(u32 opc, u8 r1, u8 r2,
+					      u8 *pb, u8 fc)
 {
 	asm volatile(
-		"	lghi	0,0\n" /* query function */
-		"	lgr	1,%[mask]\n"
-		"	spm	0\n" /* pckmo doesn't change the cc */
-		/* Parameter regs are ignored, but must be nonzero and unique */
-		"0:	.insn	rrf,%[opc] << 16,2,4,6,0\n"
-		"	brc	1,0b\n"	/* handle partial completion */
-		: "=m" (*mask)
-		: [mask] "d" ((unsigned long)mask), [opc] "i" (opcode)
-		: "cc", "0", "1");
+		"	la	%%r1,%[pb]\n"
+		"	lghi	%%r0,%[fc]\n"
+		"	.insn	rre,%[opc] << 16,%[r1],%[r2]\n"
+		: [pb] "=R" (*pb)
+		: [opc] "i" (opc), [fc] "i" (fc),
+		  [r1] "i" (r1), [r2] "i" (r2)
+		: "cc", "memory", "r0", "r1");
+}
+
+static __always_inline void __cpacf_query_rrf(u32 opc, u8 r1, u8 r2, u8 r3,
+					      u8 m4, u8 *pb, u8 fc)
+{
+	asm volatile(
+		"	la	%%r1,%[pb]\n"
+		"	lghi	%%r0,%[fc]\n"
+		"	.insn	rrf,%[opc] << 16,%[r1],%[r2],%[r3],%[m4]\n"
+		: [pb] "=R" (*pb)
+		: [opc] "i" (opc), [fc] "i" (fc), [r1] "i" (r1),
+		  [r2] "i" (r2), [r3] "i" (r3), [m4] "i" (m4)
+		: "cc", "memory", "r0", "r1");
+}
+
+static __always_inline void __cpacf_query_insn(unsigned int opcode, void *pb,
+					       u8 fc)
+{
+	switch (opcode) {
+	case CPACF_KDSA:
+		__cpacf_query_rre(CPACF_KDSA, 0, 2, pb, fc);
+		break;
+	case CPACF_KIMD:
+		__cpacf_query_rre(CPACF_KIMD, 0, 2, pb, fc);
+		break;
+	case CPACF_KLMD:
+		__cpacf_query_rre(CPACF_KLMD, 0, 2, pb, fc);
+		break;
+	case CPACF_KM:
+		__cpacf_query_rre(CPACF_KM, 2, 4, pb, fc);
+		break;
+	case CPACF_KMA:
+		__cpacf_query_rrf(CPACF_KMA, 2, 4, 6, 0, pb, fc);
+		break;
+	case CPACF_KMAC:
+		__cpacf_query_rre(CPACF_KMAC, 0, 2, pb, fc);
+		break;
+	case CPACF_KMC:
+		__cpacf_query_rre(CPACF_KMC, 2, 4, pb, fc);
+		break;
+	case CPACF_KMCTR:
+		__cpacf_query_rrf(CPACF_KMCTR, 2, 4, 6, 0, pb, fc);
+		break;
+	case CPACF_KMF:
+		__cpacf_query_rre(CPACF_KMF, 2, 4, pb, fc);
+		break;
+	case CPACF_KMO:
+		__cpacf_query_rre(CPACF_KMO, 2, 4, pb, fc);
+		break;
+	case CPACF_PCC:
+		__cpacf_query_rre(CPACF_PCC, 0, 0, pb, fc);
+		break;
+	case CPACF_PCKMO:
+		__cpacf_query_rre(CPACF_PCKMO, 0, 0, pb, fc);
+		break;
+	case CPACF_PRNO:
+		__cpacf_query_rre(CPACF_PRNO, 2, 4, pb, fc);
+		break;
+	default:
+		__cpacf_bad_opcode();
+	}
+}
+
+static __always_inline void __cpacf_query(unsigned int opcode,
+					  cpacf_mask_t *mask)
+{
+	__cpacf_query_insn(opcode, mask, CPACF_FC_QUERY);
 }
 
 static __always_inline int __cpacf_check_opcode(unsigned int opcode)
@@ -205,11 +319,25 @@ static __always_inline int __cpacf_check_opcode(unsigned int opcode)
 		return test_facility(57);	/* check for MSA5 */
 	case CPACF_KMA:
 		return test_facility(146);	/* check for MSA8 */
+	case CPACF_KDSA:
+		return test_facility(155);	/* check for MSA9 */
 	default:
-		BUG();
+		__cpacf_bad_opcode();
+		return 0;
 	}
 }
 
+/**
+ * cpacf_query() - Query the function code mask for this CPACF opcode
+ * @opcode: the opcode of the crypto instruction
+ * @mask: ptr to struct cpacf_mask_t
+ *
+ * Executes the query function for the given crypto instruction @opcode
+ * and checks if @func is available
+ *
+ * On success 1 is returned and the mask is filled with the function
+ * code mask for this CPACF opcode, otherwise 0 is returned.
+ */
 static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 {
 	if (__cpacf_check_opcode(opcode)) {
@@ -225,7 +353,8 @@ static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
 	return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
 }
 
-static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+static __always_inline int cpacf_query_func(unsigned int opcode,
+					    unsigned int func)
 {
 	cpacf_mask_t mask;
 
@@ -234,6 +363,32 @@ static __always_inline int cpacf_query_func(unsigned int opcode, unsigned int fu
 	return 0;
 }
 
+static __always_inline void __cpacf_qai(unsigned int opcode, cpacf_qai_t *qai)
+{
+	__cpacf_query_insn(opcode, qai, CPACF_FC_QUERY_AUTH_INFO);
+}
+
+/**
+ * cpacf_qai() - Get the query authentication information for a CPACF opcode
+ * @opcode: the opcode of the crypto instruction
+ * @mask: ptr to struct cpacf_qai_t
+ *
+ * Executes the query authentication information function for the given crypto
+ * instruction @opcode and checks if @func is available
+ *
+ * On success 1 is returned and the mask is filled with the query authentication
+ * information for this CPACF opcode, otherwise 0 is returned.
+ */
+static __always_inline int cpacf_qai(unsigned int opcode, cpacf_qai_t *qai)
+{
+	if (cpacf_query_func(opcode, CPACF_FC_QUERY_AUTH_INFO)) {
+		__cpacf_qai(opcode, qai);
+		return 1;
+	}
+	memset(qai, 0, sizeof(*qai));
+	return 0;
+}
+
 /**
  * cpacf_km() - executes the KM (CIPHER MESSAGE) instruction
  * @func: the function code passed to KM; see CPACF_KM_xxx defines
@@ -316,7 +471,7 @@ static inline void cpacf_kimd(unsigned long func, void *param,
 	asm volatile(
 		"	lgr	0,%[fc]\n"
 		"	lgr	1,%[pba]\n"
-		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
+		"0:	.insn	rrf,%[opc] << 16,0,%[src],8,0\n"
 		"	brc	1,0b\n" /* handle partial completion */
 		: [src] "+&d" (s.pair)
 		: [fc] "d" (func), [pba] "d" ((unsigned long)(param)),
@@ -341,7 +496,7 @@ static inline void cpacf_klmd(unsigned long func, void *param,
 	asm volatile(
 		"	lgr	0,%[fc]\n"
 		"	lgr	1,%[pba]\n"
-		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
+		"0:	.insn	rrf,%[opc] << 16,0,%[src],8,0\n"
 		"	brc	1,0b\n" /* handle partial completion */
 		: [src] "+&d" (s.pair)
 		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
@@ -350,29 +505,30 @@ static inline void cpacf_klmd(unsigned long func, void *param,
 }
 
 /**
- * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- *		  instruction
- * @func: the function code passed to KM; see CPACF_KMAC_xxx defines
+ * _cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction and updates flags in gr0
+ * @gr0: pointer to gr0 (fc and flags) passed to KMAC; see CPACF_KMAC_xxx defines
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kmac(unsigned long func, void *param,
-			     const u8 *src, long src_len)
+static inline int _cpacf_kmac(unsigned long *gr0, void *param,
+			      const u8 *src, long src_len)
 {
 	union register_pair s;
 
 	s.even = (unsigned long)src;
 	s.odd  = (unsigned long)src_len;
 	asm volatile(
-		"	lgr	0,%[fc]\n"
+		"	lgr	0,%[r0]\n"
 		"	lgr	1,%[pba]\n"
 		"0:	.insn	rre,%[opc] << 16,0,%[src]\n"
 		"	brc	1,0b\n" /* handle partial completion */
-		: [src] "+&d" (s.pair)
-		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
+		"	lgr	%[r0],0\n"
+		: [r0] "+d" (*gr0), [src] "+&d" (s.pair)
+		: [pba] "d" ((unsigned long)param),
 		  [opc] "i" (CPACF_KMAC)
 		: "cc", "memory", "0", "1");
 
@@ -380,6 +536,22 @@ static inline int cpacf_kmac(unsigned long func, void *param,
 }
 
 /**
+ * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ * @func: function code passed to KMAC; see CPACF_KMAC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kmac(unsigned long func, void *param,
+			     const u8 *src, long src_len)
+{
+	return _cpacf_kmac(&func, param, src, src_len);
+}
+
+/**
  * cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction
  * @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines
  * @param: address of parameter block; see POP for details on each func
@@ -468,6 +640,8 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
 		: [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
 		: [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
 		: "cc", "memory", "0");
+	kmsan_unpoison_memory(ucbuf, ucbuf_len);
+	kmsan_unpoison_memory(cbuf, cbuf_len);
 }
 
 /**
@@ -475,18 +649,30 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
  *		 instruction
  * @func: the function code passed to PCC; see CPACF_KM_xxx defines
  * @param: address of parameter block; see POP for details on each func
+ *
+ * Returns the condition code, this is
+ * 0 - cc code 0 (normal completion)
+ * 1 - cc code 1 (protected key wkvp mismatch or src operand out of range)
+ * 2 - cc code 2 (something invalid, scalar multiply infinity, ...)
+ * Condition code 3 (partial completion) is handled within the asm code
+ * and never returned.
  */
-static inline void cpacf_pcc(unsigned long func, void *param)
+static inline int cpacf_pcc(unsigned long func, void *param)
 {
+	int cc;
+
 	asm volatile(
 		"	lgr	0,%[fc]\n"
 		"	lgr	1,%[pba]\n"
 		"0:	.insn	rre,%[opc] << 16,0,0\n" /* PCC opcode */
 		"	brc	1,0b\n" /* handle partial completion */
-		:
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
 		: [fc] "d" (func), [pba] "d" ((unsigned long)param),
 		  [opc] "i" (CPACF_PCC)
-		: "cc", "memory", "0", "1");
+		: CC_CLOBBER_LIST("memory", "0", "1"));
+
+	return CC_TRANSFORM(cc);
 }
 
 /**
diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h
deleted file mode 100644
index f87a4788c19c..000000000000
--- a/arch/s390/include/asm/cpu_mcf.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Counter facility support definitions for the Linux perf
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- */
-#ifndef _ASM_S390_CPU_MCF_H
-#define _ASM_S390_CPU_MCF_H
-
-#include <linux/perf_event.h>
-#include <asm/cpu_mf.h>
-
-enum cpumf_ctr_set {
-	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
-	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
-	CPUMF_CTR_SET_CRYPTO  = 2,    /* Crypto-Activity Counter Set */
-	CPUMF_CTR_SET_EXT     = 3,    /* Extended Counter Set */
-	CPUMF_CTR_SET_MT_DIAG = 4,    /* MT-diagnostic Counter Set */
-
-	/* Maximum number of counter sets */
-	CPUMF_CTR_SET_MAX,
-};
-
-#define CPUMF_LCCTL_ENABLE_SHIFT    16
-#define CPUMF_LCCTL_ACTCTL_SHIFT     0
-
-static inline void ctr_set_enable(u64 *state, u64 ctrsets)
-{
-	*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
-}
-
-static inline void ctr_set_disable(u64 *state, u64 ctrsets)
-{
-	*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
-}
-
-static inline void ctr_set_start(u64 *state, u64 ctrsets)
-{
-	*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
-}
-
-static inline void ctr_set_stop(u64 *state, u64 ctrsets)
-{
-	*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
-}
-
-static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
-{
-	switch (set) {
-	case CPUMF_CTR_SET_BASIC:
-		return stcctm(BASIC, range, dest);
-	case CPUMF_CTR_SET_USER:
-		return stcctm(PROBLEM_STATE, range, dest);
-	case CPUMF_CTR_SET_CRYPTO:
-		return stcctm(CRYPTO_ACTIVITY, range, dest);
-	case CPUMF_CTR_SET_EXT:
-		return stcctm(EXTENDED, range, dest);
-	case CPUMF_CTR_SET_MT_DIAG:
-		return stcctm(MT_DIAG_CLEARING, range, dest);
-	case CPUMF_CTR_SET_MAX:
-		return 3;
-	}
-	return 3;
-}
-
-struct cpu_cf_events {
-	struct cpumf_ctr_info	info;
-	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
-	atomic64_t		alert;
-	u64			state;		/* For perf_event_open SVC */
-	u64			dev_state;	/* For /dev/hwctr */
-	unsigned int		flags;
-	size_t used;			/* Bytes used in data */
-	size_t usedss;			/* Bytes used in start/stop */
-	unsigned char start[PAGE_SIZE];	/* Counter set at event add */
-	unsigned char stop[PAGE_SIZE];	/* Counter set at event delete */
-	unsigned char data[PAGE_SIZE];	/* Counter set at /dev/hwctr */
-	unsigned int sets;		/* # Counter set saved in memory */
-};
-DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
-bool kernel_cpumcf_avail(void);
-int __kernel_cpumcf_begin(void);
-unsigned long kernel_cpumcf_alert(int clear);
-void __kernel_cpumcf_end(void);
-
-static inline int kernel_cpumcf_begin(void)
-{
-	if (!cpum_cf_avail())
-		return -ENODEV;
-
-	preempt_disable();
-	return __kernel_cpumcf_begin();
-}
-static inline void kernel_cpumcf_end(void)
-{
-	__kernel_cpumcf_end();
-	preempt_enable();
-}
-
-/* Return true if store counter set multiple instruction is available */
-static inline int stccm_avail(void)
-{
-	return test_facility(142);
-}
-
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
-			   struct cpumf_ctr_info *info);
-int cfset_online_cpu(unsigned int cpu);
-int cfset_offline_cpu(unsigned int cpu);
-#endif /* _ASM_S390_CPU_MCF_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index feaba12dbecb..1798fbd59068 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -10,8 +10,10 @@
 #define _ASM_S390_CPU_MF_H
 
 #include <linux/errno.h>
+#include <linux/kmsan-checks.h>
 #include <asm/asm-extable.h>
 #include <asm/facility.h>
+#include <asm/asm.h>
 
 asm(".include \"asm/cpu_mf-insn.h\"\n");
 
@@ -42,7 +44,6 @@ static inline int cpum_sf_avail(void)
 	return test_facility(40) && test_facility(68);
 }
 
-
 struct cpumf_ctr_info {
 	u16   cfvn;
 	u16   auth_ctl;
@@ -131,19 +132,21 @@ struct hws_combined_entry {
 	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
 } __packed;
 
-struct hws_trailer_entry {
-	union {
-		struct {
-			unsigned int f:1;	/* 0 - Block Full Indicator   */
-			unsigned int a:1;	/* 1 - Alert request control  */
-			unsigned int t:1;	/* 2 - Timestamp format	      */
-			unsigned int :29;	/* 3 - 31: Reserved	      */
-			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
-			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
-		};
-		unsigned long long flags;	/* 0 - 63: All indicators     */
+union hws_trailer_header {
+	struct {
+		unsigned int f:1;	/* 0 - Block Full Indicator   */
+		unsigned int a:1;	/* 1 - Alert request control  */
+		unsigned int t:1;	/* 2 - Timestamp format	      */
+		unsigned int :29;	/* 3 - 31: Reserved	      */
+		unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+		unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
+		unsigned long long overflow; /* 64 - Overflow Count   */
 	};
-	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	u128 val;
+};
+
+struct hws_trailer_entry {
+	union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count     */
 	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
 	unsigned long long reserved1;	 /* 32 -Reserved		      */
 	unsigned long long reserved2;	 /*				      */
@@ -168,7 +171,7 @@ static inline int qctri(struct cpumf_ctr_info *info)
 {
 	int rc = -EINVAL;
 
-	asm volatile (
+	asm_inline volatile (
 		"0:	qctri	%1\n"
 		"1:	lhi	%0,0\n"
 		"2:\n"
@@ -182,12 +185,13 @@ static inline int lcctl(u64 ctl)
 {
 	int cc;
 
-	asm volatile (
-		"	lcctl	%1\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "Q" (ctl) : "cc");
-	return cc;
+	asm_inline volatile (
+		"	lcctl	%[ctl]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [ctl] "Q" (ctl)
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
 /* Extract CPU counter */
@@ -196,13 +200,14 @@ static inline int __ecctr(u64 ctr, u64 *content)
 	u64 _content;
 	int cc;
 
-	asm volatile (
-		"	ecctr	%0,%2\n"
-		"	ipm	%1\n"
-		"	srl	%1,28\n"
-		: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+	asm_inline volatile (
+		"	ecctr	%[_content],%[ctr]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [_content] "=d" (_content)
+		: [ctr] "d" (ctr)
+		: CC_CLOBBER);
 	*content = _content;
-	return cc;
+	return CC_TRANSFORM(cc);
 }
 
 /* Extract CPU counter */
@@ -232,13 +237,17 @@ static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
 	int cc;
 
 	asm volatile (
-		"	STCCTM	%2,%3,%1\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc)
-		: "Q" (*dest), "d" (range), "i" (set)
-		: "cc", "memory");
-	return cc;
+		"	STCCTM	%[range],%[set],%[dest]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [dest] "Q" (*dest), [range] "d" (range), [set] "i" (set)
+		: CC_CLOBBER_LIST("memory"));
+	/*
+	 * If cc == 2, less than RANGE counters are stored, but it's not easy
+	 * to tell how many. Always unpoison the whole range for simplicity.
+	 */
+	kmsan_unpoison_memory(dest, range * sizeof(u64));
+	return CC_TRANSFORM(cc);
 }
 
 /* Query sampling information */
@@ -258,74 +267,20 @@ static inline int qsi(struct hws_qsi_info_block *info)
 /* Load sampling controls */
 static inline int lsctl(struct hws_lsctl_request_block *req)
 {
-	int cc;
+	int cc, exception;
 
-	cc = 1;
+	exception = 1;
 	asm volatile(
-		"0:	lsctl	0(%1)\n"
-		"1:	ipm	%0\n"
-		"	srl	%0,28\n"
+		"0:	lsctl	%[req]\n"
+		"1:	lhi	%[exc],0\n"
 		"2:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: "+d" (cc), "+a" (req)
-		: "m" (*req)
-		: "cc", "memory");
-
-	return cc ? -EINVAL : 0;
-}
-
-/* Sampling control helper functions */
-
-#include <linux/time.h>
-
-static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
-						unsigned long freq)
-{
-	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
-}
-
-static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
-						unsigned long rate)
-{
-	return USEC_PER_SEC * qsi->cpu_speed / rate;
-}
-
-#define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
-#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
-
-/* Return TOD timestamp contained in an trailer entry */
-static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
-{
-	/* TOD in STCKE format */
-	if (te->t)
-		return *((unsigned long long *) &te->timestamp[1]);
-
-	/* TOD in STCK format */
-	return *((unsigned long long *) &te->timestamp[0]);
-}
-
-/* Return pointer to trailer entry of an sample data block */
-static inline unsigned long *trailer_entry_ptr(unsigned long v)
-{
-	void *ret;
-
-	ret = (void *) v;
-	ret += PAGE_SIZE;
-	ret -= sizeof(struct hws_trailer_entry);
-
-	return (unsigned long *) ret;
-}
-
-/* Return true if the entry in the sample data block table (sdbt)
- * is a link to the next sdbt */
-static inline int is_link_entry(unsigned long *s)
-{
-	return *s & 0x1ul ? 1 : 0;
-}
-
-/* Return pointer to the linked sdbt */
-static inline unsigned long *get_next_sdbt(unsigned long *s)
-{
-	return (unsigned long *) (*s & ~0x1ul);
+		: CC_OUT(cc, cc), [exc] "+d" (exception)
+		: [req] "Q" (*req)
+		: CC_CLOBBER);
+	if (exception || CC_TRANSFORM(cc))
+		return -EINVAL;
+	return 0;
 }
 #endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 14cfd48d598e..6c6a99660e78 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -2,29 +2,37 @@
 /*
  * Module interface for CPU features
  *
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2022
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
 #ifndef __ASM_S390_CPUFEATURE_H
 #define __ASM_S390_CPUFEATURE_H
 
-#include <asm/elf.h>
+#include <asm/facility.h>
 
-/* Hardware features on Linux on z Systems are indicated by facility bits that
- * are mapped to the so-called machine flags.  Particular machine flags are
- * then used to define ELF hardware capabilities; most notably hardware flags
- * that are essential for user space / glibc.
- *
- * Restrict the set of exposed CPU features to ELF hardware capabilities for
- * now.  Additional machine flags can be indicated by values larger than
- * MAX_ELF_HWCAP_FEATURES.
- */
-#define MAX_ELF_HWCAP_FEATURES	(8 * sizeof(elf_hwcap))
-#define MAX_CPU_FEATURES	MAX_ELF_HWCAP_FEATURES
+enum {
+	S390_CPU_FEATURE_MSA,
+	S390_CPU_FEATURE_VXRS,
+	S390_CPU_FEATURE_UV,
+	S390_CPU_FEATURE_D288,
+	MAX_CPU_FEATURES
+};
 
-#define cpu_feature(feat)	ilog2(HWCAP_ ## feat)
+#define cpu_feature(feature)	(feature)
 
 int cpu_have_feature(unsigned int nr);
 
+#define cpu_has_bear()		test_facility(193)
+#define cpu_has_edat1()		test_facility(8)
+#define cpu_has_edat2()		test_facility(78)
+#define cpu_has_gs()		test_facility(133)
+#define cpu_has_idte()		test_facility(3)
+#define cpu_has_nx()		test_facility(130)
+#define cpu_has_rdp()		test_facility(194)
+#define cpu_has_seq_insn()	test_facility(85)
+#define cpu_has_tlb_lc()	test_facility(51)
+#define cpu_has_topology()	test_facility(11)
+#define cpu_has_vx()		test_facility(129)
+
 #endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 1d389847b588..30bb3ec4e5fc 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,30 +11,11 @@
 #include <linux/types.h>
 #include <asm/timex.h>
 
-#define CPUTIME_PER_USEC 4096ULL
-#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
-
-/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-
-/*
- * Convert cputime to microseconds.
- */
-static inline u64 cputime_to_usecs(const u64 cputime)
-{
-	return cputime >> 12;
-}
-
 /*
  * Convert cputime to nanoseconds.
  */
 #define cputime_to_nsecs(cputime) tod_to_ns(cputime)
 
-u64 arch_cpu_idle_time(int cpu);
-
-#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
-
 void account_idle_time_irq(void);
 
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 638137d46c85..a03f64033760 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -25,7 +25,7 @@ struct css_general_char {
 	u64 : 2;
 
 	u64 : 3;
-	u64 aif_osa : 1; /* bit 67 */
+	u64 aif_qdio : 1;/* bit 67 */
 	u64 : 12;
 	u64 eadm_rf : 1; /* bit 80 */
 	u64 : 1;
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
deleted file mode 100644
index 267a8f88e143..000000000000
--- a/arch/s390/include/asm/ctl_reg.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2009
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef __ASM_CTL_REG_H
-#define __ASM_CTL_REG_H
-
-#include <linux/bits.h>
-
-#define CR0_CLOCK_COMPARATOR_SIGN	BIT(63 - 10)
-#define CR0_LOW_ADDRESS_PROTECTION	BIT(63 - 35)
-#define CR0_FETCH_PROTECTION_OVERRIDE	BIT(63 - 38)
-#define CR0_STORAGE_PROTECTION_OVERRIDE	BIT(63 - 39)
-#define CR0_EMERGENCY_SIGNAL_SUBMASK	BIT(63 - 49)
-#define CR0_EXTERNAL_CALL_SUBMASK	BIT(63 - 50)
-#define CR0_CLOCK_COMPARATOR_SUBMASK	BIT(63 - 52)
-#define CR0_CPU_TIMER_SUBMASK		BIT(63 - 53)
-#define CR0_SERVICE_SIGNAL_SUBMASK	BIT(63 - 54)
-#define CR0_UNUSED_56			BIT(63 - 56)
-#define CR0_INTERRUPT_KEY_SUBMASK	BIT(63 - 57)
-#define CR0_MEASUREMENT_ALERT_SUBMASK	BIT(63 - 58)
-
-#define CR14_UNUSED_32			BIT(63 - 32)
-#define CR14_UNUSED_33			BIT(63 - 33)
-#define CR14_CHANNEL_REPORT_SUBMASK	BIT(63 - 35)
-#define CR14_RECOVERY_SUBMASK		BIT(63 - 36)
-#define CR14_DEGRADATION_SUBMASK	BIT(63 - 37)
-#define CR14_EXTERNAL_DAMAGE_SUBMASK	BIT(63 - 38)
-#define CR14_WARNING_SUBMASK		BIT(63 - 39)
-
-#ifndef __ASSEMBLY__
-
-#include <linux/bug.h>
-
-#define __ctl_load(array, low, high) do {				\
-	typedef struct { char _[sizeof(array)]; } addrtype;		\
-									\
-	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
-	asm volatile(							\
-		"	lctlg	%1,%2,%0\n"				\
-		:							\
-		: "Q" (*(addrtype *)(&array)), "i" (low), "i" (high)	\
-		: "memory");						\
-} while (0)
-
-#define __ctl_store(array, low, high) do {				\
-	typedef struct { char _[sizeof(array)]; } addrtype;		\
-									\
-	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
-	asm volatile(							\
-		"	stctg	%1,%2,%0\n"				\
-		: "=Q" (*(addrtype *)(&array))				\
-		: "i" (low), "i" (high));				\
-} while (0)
-
-static __always_inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
-{
-	unsigned long reg;
-
-	__ctl_store(reg, cr, cr);
-	reg |= 1UL << bit;
-	__ctl_load(reg, cr, cr);
-}
-
-static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
-{
-	unsigned long reg;
-
-	__ctl_store(reg, cr, cr);
-	reg &= ~(1UL << bit);
-	__ctl_load(reg, cr, cr);
-}
-
-void smp_ctl_set_clear_bit(int cr, int bit, bool set);
-
-static inline void ctl_set_bit(int cr, int bit)
-{
-	smp_ctl_set_clear_bit(cr, bit, true);
-}
-
-static inline void ctl_clear_bit(int cr, int bit)
-{
-	smp_ctl_set_clear_bit(cr, bit, false);
-}
-
-union ctlreg0 {
-	unsigned long val;
-	struct {
-		unsigned long	   : 8;
-		unsigned long tcx  : 1;	/* Transactional-Execution control */
-		unsigned long pifo : 1;	/* Transactional-Execution Program-
-					   Interruption-Filtering Override */
-		unsigned long	   : 3;
-		unsigned long ccc  : 1; /* Cryptography counter control */
-		unsigned long	   : 18;
-		unsigned long	   : 3;
-		unsigned long lap  : 1; /* Low-address-protection control */
-		unsigned long	   : 4;
-		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
-		unsigned long	   : 2;
-		unsigned long iep  : 1; /* Instruction-Execution-Protection */
-		unsigned long	   : 1;
-		unsigned long afp  : 1; /* AFP-register control */
-		unsigned long vx   : 1; /* Vector enablement control */
-		unsigned long	   : 7;
-		unsigned long sssm : 1; /* Service signal subclass mask */
-		unsigned long	   : 9;
-	};
-};
-
-union ctlreg2 {
-	unsigned long val;
-	struct {
-		unsigned long	    : 33;
-		unsigned long ducto : 25;
-		unsigned long	    : 1;
-		unsigned long gse   : 1;
-		unsigned long	    : 1;
-		unsigned long tds   : 1;
-		unsigned long tdc   : 2;
-	};
-};
-
-union ctlreg5 {
-	unsigned long val;
-	struct {
-		unsigned long	    : 33;
-		unsigned long pasteo: 25;
-		unsigned long	    : 6;
-	};
-};
-
-union ctlreg15 {
-	unsigned long val;
-	struct {
-		unsigned long lsea  : 61;
-		unsigned long	    : 3;
-	};
-};
-
-#endif /* __ASSEMBLY__ */
-#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/ctlreg.h b/arch/s390/include/asm/ctlreg.h
new file mode 100644
index 000000000000..e6527f51ad0b
--- /dev/null
+++ b/arch/s390/include/asm/ctlreg.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_CTLREG_H
+#define __ASM_S390_CTLREG_H
+
+#include <linux/bits.h>
+
+#define CR0_TRANSACTIONAL_EXECUTION_BIT		(63 - 8)
+#define CR0_CLOCK_COMPARATOR_SIGN_BIT		(63 - 10)
+#define CR0_CRYPTOGRAPHY_COUNTER_BIT		(63 - 13)
+#define CR0_PAI_EXTENSION_BIT			(63 - 14)
+#define CR0_CPUMF_EXTRACTION_AUTH_BIT		(63 - 15)
+#define CR0_WARNING_TRACK_BIT			(63 - 30)
+#define CR0_LOW_ADDRESS_PROTECTION_BIT		(63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE_BIT	(63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE_BIT	(63 - 39)
+#define CR0_EDAT_BIT				(63 - 40)
+#define CR0_INSTRUCTION_EXEC_PROTECTION_BIT	(63 - 43)
+#define CR0_VECTOR_BIT				(63 - 46)
+#define CR0_MALFUNCTION_ALERT_SUBMASK_BIT	(63 - 48)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK_BIT	(63 - 49)
+#define CR0_EXTERNAL_CALL_SUBMASK_BIT		(63 - 50)
+#define CR0_CLOCK_COMPARATOR_SUBMASK_BIT	(63 - 52)
+#define CR0_CPU_TIMER_SUBMASK_BIT		(63 - 53)
+#define CR0_SERVICE_SIGNAL_SUBMASK_BIT		(63 - 54)
+#define CR0_UNUSED_56_BIT			(63 - 56)
+#define CR0_INTERRUPT_KEY_SUBMASK_BIT		(63 - 57)
+#define CR0_MEASUREMENT_ALERT_SUBMASK_BIT	(63 - 58)
+#define CR0_ETR_SUBMASK_BIT			(63 - 59)
+#define CR0_IUCV_BIT				(63 - 62)
+
+#define CR0_TRANSACTIONAL_EXECUTION		BIT(CR0_TRANSACTIONAL_EXECUTION_BIT)
+#define CR0_CLOCK_COMPARATOR_SIGN		BIT(CR0_CLOCK_COMPARATOR_SIGN_BIT)
+#define CR0_CRYPTOGRAPHY_COUNTER		BIT(CR0_CRYPTOGRAPHY_COUNTER_BIT)
+#define CR0_PAI_EXTENSION			BIT(CR0_PAI_EXTENSION_BIT)
+#define CR0_CPUMF_EXTRACTION_AUTH		BIT(CR0_CPUMF_EXTRACTION_AUTH_BIT)
+#define CR0_WARNING_TRACK			BIT(CR0_WARNING_TRACK_BIT)
+#define CR0_LOW_ADDRESS_PROTECTION		BIT(CR0_LOW_ADDRESS_PROTECTION_BIT)
+#define CR0_FETCH_PROTECTION_OVERRIDE		BIT(CR0_FETCH_PROTECTION_OVERRIDE_BIT)
+#define CR0_STORAGE_PROTECTION_OVERRIDE		BIT(CR0_STORAGE_PROTECTION_OVERRIDE_BIT)
+#define CR0_EDAT				BIT(CR0_EDAT_BIT)
+#define CR0_INSTRUCTION_EXEC_PROTECTION		BIT(CR0_INSTRUCTION_EXEC_PROTECTION_BIT)
+#define CR0_VECTOR				BIT(CR0_VECTOR_BIT)
+#define CR0_MALFUNCTION_ALERT_SUBMASK		BIT(CR0_MALFUNCTION_ALERT_SUBMASK_BIT)
+#define CR0_EMERGENCY_SIGNAL_SUBMASK		BIT(CR0_EMERGENCY_SIGNAL_SUBMASK_BIT)
+#define CR0_EXTERNAL_CALL_SUBMASK		BIT(CR0_EXTERNAL_CALL_SUBMASK_BIT)
+#define CR0_CLOCK_COMPARATOR_SUBMASK		BIT(CR0_CLOCK_COMPARATOR_SUBMASK_BIT)
+#define CR0_CPU_TIMER_SUBMASK			BIT(CR0_CPU_TIMER_SUBMASK_BIT)
+#define CR0_SERVICE_SIGNAL_SUBMASK		BIT(CR0_SERVICE_SIGNAL_SUBMASK_BIT)
+#define CR0_UNUSED_56				BIT(CR0_UNUSED_56_BIT)
+#define CR0_INTERRUPT_KEY_SUBMASK		BIT(CR0_INTERRUPT_KEY_SUBMASK_BIT)
+#define CR0_MEASUREMENT_ALERT_SUBMASK		BIT(CR0_MEASUREMENT_ALERT_SUBMASK_BIT)
+#define CR0_ETR_SUBMASK				BIT(CR0_ETR_SUBMASK_BIT)
+#define CR0_IUCV				BIT(CR0_IUCV_BIT)
+
+#define CR2_MIO_ADDRESSING_BIT			(63 - 58)
+#define CR2_GUARDED_STORAGE_BIT			(63 - 59)
+
+#define CR2_MIO_ADDRESSING			BIT(CR2_MIO_ADDRESSING_BIT)
+#define CR2_GUARDED_STORAGE			BIT(CR2_GUARDED_STORAGE_BIT)
+
+#define CR14_UNUSED_32_BIT			(63 - 32)
+#define CR14_UNUSED_33_BIT			(63 - 33)
+#define CR14_CHANNEL_REPORT_SUBMASK_BIT		(63 - 35)
+#define CR14_RECOVERY_SUBMASK_BIT		(63 - 36)
+#define CR14_DEGRADATION_SUBMASK_BIT		(63 - 37)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK_BIT	(63 - 38)
+#define CR14_WARNING_SUBMASK_BIT		(63 - 39)
+
+#define CR14_UNUSED_32				BIT(CR14_UNUSED_32_BIT)
+#define CR14_UNUSED_33				BIT(CR14_UNUSED_33_BIT)
+#define CR14_CHANNEL_REPORT_SUBMASK		BIT(CR14_CHANNEL_REPORT_SUBMASK_BIT)
+#define CR14_RECOVERY_SUBMASK			BIT(CR14_RECOVERY_SUBMASK_BIT)
+#define CR14_DEGRADATION_SUBMASK		BIT(CR14_DEGRADATION_SUBMASK_BIT)
+#define CR14_EXTERNAL_DAMAGE_SUBMASK		BIT(CR14_EXTERNAL_DAMAGE_SUBMASK_BIT)
+#define CR14_WARNING_SUBMASK			BIT(CR14_WARNING_SUBMASK_BIT)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bug.h>
+
+struct ctlreg {
+	unsigned long val;
+};
+
+#define __local_ctl_load(low, high, array) do {				\
+	struct addrtype {						\
+		char _[sizeof(array)];					\
+	};								\
+	int _high = high;						\
+	int _low = low;							\
+	int _esize;							\
+									\
+	_esize = (_high - _low + 1) * sizeof(struct ctlreg);		\
+	BUILD_BUG_ON(sizeof(struct addrtype) != _esize);		\
+	typecheck(struct ctlreg, array[0]);				\
+	asm volatile(							\
+		"	lctlg	%[_low],%[_high],%[_arr]\n"		\
+		:							\
+		: [_arr] "Q" (*(struct addrtype *)(&array)),		\
+		  [_low] "i" (low), [_high] "i" (high)			\
+		: "memory");						\
+} while (0)
+
+#define __local_ctl_store(low, high, array) do {			\
+	struct addrtype {						\
+		char _[sizeof(array)];					\
+	};								\
+	int _high = high;						\
+	int _low = low;							\
+	int _esize;							\
+									\
+	_esize = (_high - _low + 1) * sizeof(struct ctlreg);		\
+	BUILD_BUG_ON(sizeof(struct addrtype) != _esize);		\
+	typecheck(struct ctlreg, array[0]);				\
+	asm volatile(							\
+		"	stctg	%[_low],%[_high],%[_arr]\n"		\
+		: [_arr] "=Q" (*(struct addrtype *)(&array))		\
+		: [_low] "i" (low), [_high] "i" (high));		\
+} while (0)
+
+static __always_inline void local_ctl_load(unsigned int cr, struct ctlreg *reg)
+{
+	asm volatile(
+		"	lctlg	%[cr],%[cr],%[reg]\n"
+		:
+		: [reg] "Q" (*reg), [cr] "i" (cr)
+		: "memory");
+}
+
+static __always_inline void local_ctl_store(unsigned int cr, struct ctlreg *reg)
+{
+	asm volatile(
+		"	stctg	%[cr],%[cr],%[reg]\n"
+		: [reg] "=Q" (*reg)
+		: [cr] "i" (cr));
+}
+
+static __always_inline struct ctlreg local_ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+	struct ctlreg new, old;
+
+	local_ctl_store(cr, &old);
+	new = old;
+	new.val |= 1UL << bit;
+	local_ctl_load(cr, &new);
+	return old;
+}
+
+static __always_inline struct ctlreg local_ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+	struct ctlreg new, old;
+
+	local_ctl_store(cr, &old);
+	new = old;
+	new.val &= ~(1UL << bit);
+	local_ctl_load(cr, &new);
+	return old;
+}
+
+struct lowcore;
+
+void system_ctlreg_lock(void);
+void system_ctlreg_unlock(void);
+void system_ctlreg_init_save_area(struct lowcore *lc);
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request);
+
+enum {
+	CTLREG_SET_BIT,
+	CTLREG_CLEAR_BIT,
+	CTLREG_LOAD,
+};
+
+static inline void system_ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+	system_ctlreg_modify(cr, bit, CTLREG_SET_BIT);
+}
+
+static inline void system_ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+	system_ctlreg_modify(cr, bit, CTLREG_CLEAR_BIT);
+}
+
+static inline void system_ctl_load(unsigned int cr, struct ctlreg *reg)
+{
+	system_ctlreg_modify(cr, reg->val, CTLREG_LOAD);
+}
+
+union ctlreg0 {
+	unsigned long val;
+	struct ctlreg reg;
+	struct {
+		unsigned long	   : 8;
+		unsigned long tcx  : 1;	/* Transactional-Execution control */
+		unsigned long pifo : 1;	/* Transactional-Execution Program-
+					   Interruption-Filtering Override */
+		unsigned long	   : 3;
+		unsigned long ccc  : 1; /* Cryptography counter control */
+		unsigned long pec  : 1; /* PAI extension control */
+		unsigned long	   : 15;
+		unsigned long wti  : 1; /* Warning-track */
+		unsigned long	   : 4;
+		unsigned long lap  : 1; /* Low-address-protection control */
+		unsigned long	   : 4;
+		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+		unsigned long	   : 2;
+		unsigned long iep  : 1; /* Instruction-Execution-Protection */
+		unsigned long	   : 1;
+		unsigned long afp  : 1; /* AFP-register control */
+		unsigned long vx   : 1; /* Vector enablement control */
+		unsigned long	   : 7;
+		unsigned long sssm : 1; /* Service signal subclass mask */
+		unsigned long	   : 9;
+	};
+};
+
+union ctlreg2 {
+	unsigned long val;
+	struct ctlreg reg;
+	struct {
+		unsigned long	    : 33;
+		unsigned long ducto : 25;
+		unsigned long	    : 1;
+		unsigned long gse   : 1;
+		unsigned long	    : 1;
+		unsigned long tds   : 1;
+		unsigned long tdc   : 2;
+	};
+};
+
+union ctlreg5 {
+	unsigned long val;
+	struct ctlreg reg;
+	struct {
+		unsigned long	    : 33;
+		unsigned long pasteo: 25;
+		unsigned long	    : 6;
+	};
+};
+
+union ctlreg15 {
+	unsigned long val;
+	struct ctlreg reg;
+	struct {
+		unsigned long lsea  : 61;
+		unsigned long	    : 3;
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_CTLREG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
index 68f84315277c..f9529f7cf62c 100644
--- a/arch/s390/include/asm/current.h
+++ b/arch/s390/include/asm/current.h
@@ -11,9 +11,25 @@
 #define _S390_CURRENT_H
 
 #include <asm/lowcore.h>
+#include <asm/machine.h>
 
 struct task_struct;
 
-#define current ((struct task_struct *const)S390_lowcore.current_task)
+static __always_inline struct task_struct *get_current(void)
+{
+	unsigned long ptr, lc_current;
+
+	lc_current = offsetof(struct lowcore, current_task);
+	asm_inline(
+		ALTERNATIVE("	lg	%[ptr],%[offzero](%%r0)\n",
+			    "	lg	%[ptr],%[offalt](%%r0)\n",
+			    ALT_FEATURE(MFEATURE_LOWCORE))
+		: [ptr] "=d" (ptr)
+		: [offzero] "i" (lc_current),
+		  [offalt] "i" (lc_current + LOWCORE_ALT_ADDRESS));
+	return (struct task_struct *)ptr;
+}
+
+#define current get_current()
 
 #endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/dat-bits.h b/arch/s390/include/asm/dat-bits.h
new file mode 100644
index 000000000000..8d65eec2f124
--- /dev/null
+++ b/arch/s390/include/asm/dat-bits.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAT table and related structures
+ *
+ * Copyright IBM Corp. 2024
+ *
+ */
+
+#ifndef _S390_DAT_BITS_H
+#define _S390_DAT_BITS_H
+
+union asce {
+	unsigned long val;
+	struct {
+		unsigned long rsto: 52;/* Region- or Segment-Table Origin */
+		unsigned long	  : 2;
+		unsigned long g   : 1; /* Subspace Group control */
+		unsigned long p   : 1; /* Private Space control */
+		unsigned long s   : 1; /* Storage-Alteration-Event control */
+		unsigned long x   : 1; /* Space-Switch-Event control */
+		unsigned long r   : 1; /* Real-Space control */
+		unsigned long	  : 1;
+		unsigned long dt  : 2; /* Designation-Type control */
+		unsigned long tl  : 2; /* Region- or Segment-Table Length */
+	};
+};
+
+enum {
+	ASCE_TYPE_SEGMENT = 0,
+	ASCE_TYPE_REGION3 = 1,
+	ASCE_TYPE_REGION2 = 2,
+	ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Second-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Second-Table Length */
+	};
+};
+
+union region2_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Third-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Third-Table Length */
+	};
+};
+
+struct region3_table_entry_fc0 {
+	unsigned long sto: 52;/* Segment-Table Origin */
+	unsigned long	 : 1;
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 1;
+	unsigned long tf : 2; /* Segment-Table Offset */
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+	unsigned long rfaa: 33;/* Region-Frame Absolute Address */
+	unsigned long	  : 14;
+	unsigned long av  : 1; /* ACCF-Validity Control */
+	unsigned long acc : 4; /* Access-Control Bits */
+	unsigned long f   : 1; /* Fetch-Protection Bit */
+	unsigned long fc  : 1; /* Format-Control */
+	unsigned long p   : 1; /* DAT-Protection Bit */
+	unsigned long iep : 1; /* Instruction-Execution-Protection */
+	unsigned long	  : 2;
+	unsigned long i   : 1; /* Region-Invalid Bit */
+	unsigned long cr  : 1; /* Common-Region Bit */
+	unsigned long tt  : 2; /* Table-Type Bits */
+	unsigned long	  : 2;
+};
+
+union region3_table_entry {
+	unsigned long val;
+	struct region3_table_entry_fc0 fc0;
+	struct region3_table_entry_fc1 fc1;
+	struct {
+		unsigned long	: 53;
+		unsigned long fc: 1; /* Format-Control */
+		unsigned long	: 4;
+		unsigned long i : 1; /* Region-Invalid Bit */
+		unsigned long cr: 1; /* Common-Region Bit */
+		unsigned long tt: 2; /* Table-Type Bits */
+		unsigned long	: 2;
+	};
+};
+
+struct segment_table_entry_fc0 {
+	unsigned long pto: 53;/* Page-Table Origin */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 3;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+struct segment_table_entry_fc1 {
+	unsigned long sfaa: 44;/* Segment-Frame Absolute Address */
+	unsigned long	  : 3;
+	unsigned long av  : 1; /* ACCF-Validity Control */
+	unsigned long acc : 4; /* Access-Control Bits */
+	unsigned long f   : 1; /* Fetch-Protection Bit */
+	unsigned long fc  : 1; /* Format-Control */
+	unsigned long p   : 1; /* DAT-Protection Bit */
+	unsigned long iep : 1; /* Instruction-Execution-Protection */
+	unsigned long	  : 2;
+	unsigned long i   : 1; /* Segment-Invalid Bit */
+	unsigned long cs  : 1; /* Common-Segment Bit */
+	unsigned long tt  : 2; /* Table-Type Bits */
+	unsigned long	  : 2;
+};
+
+union segment_table_entry {
+	unsigned long val;
+	struct segment_table_entry_fc0 fc0;
+	struct segment_table_entry_fc1 fc1;
+	struct {
+		unsigned long	: 53;
+		unsigned long fc: 1; /* Format-Control */
+		unsigned long	: 4;
+		unsigned long i : 1; /* Segment-Invalid Bit */
+		unsigned long cs: 1; /* Common-Segment Bit */
+		unsigned long tt: 2; /* Table-Type Bits */
+		unsigned long	: 2;
+	};
+};
+
+union page_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long pfra: 52;/* Page-Frame Real Address */
+		unsigned long z   : 1; /* Zero Bit */
+		unsigned long i   : 1; /* Page-Invalid Bit */
+		unsigned long p   : 1; /* DAT-Protection Bit */
+		unsigned long iep : 1; /* Instruction-Execution-Protection */
+		unsigned long	  : 8;
+	};
+};
+
+enum {
+	TABLE_TYPE_SEGMENT = 0,
+	TABLE_TYPE_REGION3 = 1,
+	TABLE_TYPE_REGION2 = 2,
+	TABLE_TYPE_REGION1 = 3
+};
+
+#endif /* _S390_DAT_BITS_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 77f24262c25c..6375276d94ea 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -4,8 +4,8 @@
  *
  *    Copyright IBM Corp. 1999, 2020
  */
-#ifndef DEBUG_H
-#define DEBUG_H
+#ifndef _ASM_S390_DEBUG_H
+#define _ASM_S390_DEBUG_H
 
 #include <linux/string.h>
 #include <linux/spinlock.h>
@@ -66,14 +66,15 @@ typedef int (debug_header_proc_t) (debug_info_t *id,
 				   struct debug_view *view,
 				   int area,
 				   debug_entry_t *entry,
-				   char *out_buf);
+				   char *out_buf, size_t out_buf_size);
 
 typedef int (debug_format_proc_t) (debug_info_t *id,
 				   struct debug_view *view, char *out_buf,
+				   size_t out_buf_size,
 				   const char *in_buf);
 typedef int (debug_prolog_proc_t) (debug_info_t *id,
 				   struct debug_view *view,
-				   char *out_buf);
+				   char *out_buf, size_t out_buf_size);
 typedef int (debug_input_proc_t) (debug_info_t *id,
 				  struct debug_view *view,
 				  struct file *file,
@@ -81,8 +82,13 @@ typedef int (debug_input_proc_t) (debug_info_t *id,
 				  size_t in_buf_size, loff_t *offset);
 
 int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
-			 int area, debug_entry_t *entry, char *out_buf);
+			 int area, debug_entry_t *entry,
+			 char *out_buf, size_t out_buf_size);
 
+#define DEBUG_SPRINTF_MAX_ARGS 10
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+			    char *out_buf, size_t out_buf_size,
+			    const char *inbuf);
 struct debug_view {
 	char name[DEBUG_MAX_NAME_LEN];
 	debug_prolog_proc_t *prolog_proc;
@@ -112,6 +118,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
 				  int buf_size, umode_t mode, uid_t uid,
 				  gid_t gid);
 
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+		   char *buf, size_t buf_size, bool reverse);
+
 void debug_unregister(debug_info_t *id);
 
 void debug_set_level(debug_info_t *id, int new_level);
@@ -222,7 +231,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
@@ -350,7 +359,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
@@ -487,4 +496,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas);
 
 #endif /* MODULE */
 
-#endif /* DEBUG_H */
+#endif /* _ASM_S390_DEBUG_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 56e99c286d12..8db8db3b1018 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -12,6 +12,8 @@
 #include <linux/if_ether.h>
 #include <linux/percpu.h>
 #include <asm/asm-extable.h>
+#include <asm/sclp.h>
+#include <asm/cio.h>
 
 enum diag_stat_enum {
 	DIAG_STAT_X008,
@@ -20,6 +22,7 @@ enum diag_stat_enum {
 	DIAG_STAT_X014,
 	DIAG_STAT_X044,
 	DIAG_STAT_X064,
+	DIAG_STAT_X08C,
 	DIAG_STAT_X09C,
 	DIAG_STAT_X0DC,
 	DIAG_STAT_X204,
@@ -33,7 +36,11 @@ enum diag_stat_enum {
 	DIAG_STAT_X2FC,
 	DIAG_STAT_X304,
 	DIAG_STAT_X308,
+	DIAG_STAT_X310,
 	DIAG_STAT_X318,
+	DIAG_STAT_X320,
+	DIAG_STAT_X324,
+	DIAG_STAT_X49C,
 	DIAG_STAT_X500,
 	NR_DIAG_STAT
 };
@@ -41,6 +48,13 @@ enum diag_stat_enum {
 void diag_stat_inc(enum diag_stat_enum nr);
 void diag_stat_inc_norecursion(enum diag_stat_enum nr);
 
+struct hypfs_diag0c_entry;
+
+/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data);
+
 /*
  * Diagnose 10: Release page range
  */
@@ -52,7 +66,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 	end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
 
 	diag_stat_inc(DIAG_STAT_X010);
-	asm volatile(
+	asm_inline volatile(
 		"0:	diag	%0,%1,0x10\n"
 		"1:	nopr	%%r7\n"
 		EX_TABLE(0b, 1b)
@@ -79,10 +93,20 @@ struct diag210 {
 	u8 vrdccrty;	/* real device type (output) */
 	u8 vrdccrmd;	/* real device model (output) */
 	u8 vrdccrft;	/* real device feature (output) */
-} __attribute__((packed, aligned(4)));
+} __packed __aligned(4);
 
 extern int diag210(struct diag210 *addr);
 
+struct diag8c {
+	u8 flags;
+	u8 num_partitions;
+	u16 width;
+	u16 height;
+	u8 data[];
+} __packed __aligned(4);
+
+extern int diag8c(struct diag8c *out, struct ccw_dev_id *devno);
+
 /* bit is set in flags, when physical cpu info is included in diag 204 data */
 #define DIAG204_LPAR_PHYS_FLG 0x80
 #define DIAG204_LPAR_NAME_LEN 8		/* lpar name len in diag 204 data */
@@ -96,6 +120,10 @@ enum diag204_sc {
 	DIAG204_SUBC_STIB7 = 7
 };
 
+#define DIAG204_SUBCODE_MASK 0xffff
+#define DIAG204_BIF_BIT 0x80000000
+#define DIAG204_BUSY_WAIT (HZ / 10)
+
 /* The two available diag 204 data formats */
 enum diag204_format {
 	DIAG204_INFO_SIMPLE = 0,
@@ -304,6 +332,11 @@ union diag318_info {
 	};
 };
 
+static inline bool diag204_has_bif(void)
+{
+	return sclp.has_diag204_bif;
+}
+
 int diag204(unsigned long subcode, unsigned long size, void *addr);
 int diag224(void *ptr);
 int diag26c(void *req, void *resp, enum diag26c_sc subcode);
@@ -316,9 +349,10 @@ struct hypfs_diag0c_entry;
  */
 struct diag_ops {
 	int (*diag210)(struct diag210 *addr);
-	int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+	int (*diag26c)(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
 	int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
-	void (*diag0c)(struct hypfs_diag0c_entry *entry);
+	int (*diag8c)(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
+	void (*diag0c)(unsigned long rx);
 	void (*diag308_reset)(void);
 };
 
@@ -326,9 +360,18 @@ extern struct diag_ops diag_amode31_ops;
 extern struct diag210 *__diag210_tmp_amode31;
 
 int _diag210_amode31(struct diag210 *addr);
-int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode);
+int _diag26c_amode31(unsigned long rx, unsigned long rx1, enum diag26c_sc subcode);
 int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode);
-void _diag0c_amode31(struct hypfs_diag0c_entry *entry);
+void _diag0c_amode31(unsigned long rx);
 void _diag308_reset_amode31(void);
+int _diag8c_amode31(struct diag8c *addr, struct ccw_dev_id *devno, size_t len);
+
+/* diag 49c subcodes */
+enum diag49c_sc {
+	DIAG49C_SUBC_ACK = 0,
+	DIAG49C_SUBC_REG = 1
+};
+
+int diag49c(unsigned long subcode);
 
 #endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/diag288.h b/arch/s390/include/asm/diag288.h
new file mode 100644
index 000000000000..5e1b43cea9d6
--- /dev/null
+++ b/arch/s390/include/asm/diag288.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DIAG288_H
+#define _ASM_S390_DIAG288_H
+
+#include <asm/asm-extable.h>
+#include <asm/types.h>
+
+#define MIN_INTERVAL 15	    /* Minimal time supported by diag288 */
+#define MAX_INTERVAL 3600   /* One hour should be enough - pure estimation */
+
+#define WDT_DEFAULT_TIMEOUT 30
+
+/* Function codes - init, change, cancel */
+#define WDT_FUNC_INIT 0
+#define WDT_FUNC_CHANGE 1
+#define WDT_FUNC_CANCEL 2
+#define WDT_FUNC_CONCEAL 0x80000000
+
+/* Action codes for LPAR watchdog */
+#define LPARWDT_RESTART 0
+
+static inline int __diag288(unsigned int func, unsigned int timeout,
+			    unsigned long action, unsigned int len)
+{
+	union register_pair r1 = { .even = func, .odd = timeout, };
+	union register_pair r3 = { .even = action, .odd = len, };
+	int rc = -EINVAL;
+
+	asm volatile(
+		"	diag	%[r1],%[r3],0x288\n"
+		"0:	lhi	%[rc],0\n"
+		"1:"
+		EX_TABLE(0b, 1b)
+		: [rc] "+d" (rc)
+		: [r1] "d" (r1.pair), [r3] "d" (r3.pair)
+		: "cc", "memory");
+	return rc;
+}
+
+#endif /* _ASM_S390_DIAG288_H */
diff --git a/arch/s390/include/asm/dma-types.h b/arch/s390/include/asm/dma-types.h
new file mode 100644
index 000000000000..5c5734e6946c
--- /dev/null
+++ b/arch/s390/include/asm/dma-types.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DMA_TYPES_H_
+#define _ASM_S390_DMA_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * typedef dma32_t
+ * Contains a 31 bit absolute address to a DMA capable piece of storage.
+ *
+ * For CIO, DMA addresses are always absolute addresses. These addresses tend
+ * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). Under
+ * certain circumstances 31 bit wide addresses must be used because the
+ * address must fit in 31 bits.
+ *
+ * This type is to be used when such fields can be modelled as 32 bit wide.
+ */
+typedef u32 __bitwise dma32_t;
+
+/*
+ * typedef dma64_t
+ * Contains a 64 bit absolute address to a DMA capable piece of storage.
+ *
+ * For CIO, DMA addresses are always absolute addresses. These addresses tend
+ * to be used in architectured memory blocks (like ORB, IDAW, MIDAW).
+ *
+ * This type is to be used to model such 64 bit wide fields.
+ */
+typedef u64 __bitwise dma64_t;
+
+/*
+ * Although DMA addresses should be obtained using the DMA API, in cases when
+ * it is known that the first argument holds a virtual address that points to
+ * DMA-able 31 bit addressable storage, then this function can be safely used.
+ */
+static inline dma32_t virt_to_dma32(void *ptr)
+{
+	return (__force dma32_t)__pa32(ptr);
+}
+
+static inline void *dma32_to_virt(dma32_t addr)
+{
+	return __va((__force unsigned long)addr);
+}
+
+static inline dma32_t u32_to_dma32(u32 addr)
+{
+	return (__force dma32_t)addr;
+}
+
+static inline u32 dma32_to_u32(dma32_t addr)
+{
+	return (__force u32)addr;
+}
+
+static inline dma32_t dma32_add(dma32_t a, u32 b)
+{
+	return (__force dma32_t)((__force u32)a + b);
+}
+
+static inline dma32_t dma32_and(dma32_t a, u32 b)
+{
+	return (__force dma32_t)((__force u32)a & b);
+}
+
+/*
+ * Although DMA addresses should be obtained using the DMA API, in cases when
+ * it is known that the first argument holds a virtual address that points to
+ * DMA-able storage, then this function can be safely used.
+ */
+static inline dma64_t virt_to_dma64(void *ptr)
+{
+	return (__force dma64_t)__pa(ptr);
+}
+
+static inline void *dma64_to_virt(dma64_t addr)
+{
+	return __va((__force unsigned long)addr);
+}
+
+static inline dma64_t u64_to_dma64(u64 addr)
+{
+	return (__force dma64_t)addr;
+}
+
+static inline u64 dma64_to_u64(dma64_t addr)
+{
+	return (__force u64)addr;
+}
+
+static inline dma64_t dma64_add(dma64_t a, u64 b)
+{
+	return (__force dma64_t)((__force u64)a + b);
+}
+
+static inline dma64_t dma64_and(dma64_t a, u64 b)
+{
+	return (__force dma64_t)((__force u64)a & b);
+}
+
+#endif /* _ASM_S390_DMA_TYPES_H_ */
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
index 6f26f35d4a71..7fe3e31956d7 100644
--- a/arch/s390/include/asm/dma.h
+++ b/arch/s390/include/asm/dma.h
@@ -2,19 +2,13 @@
 #ifndef _ASM_S390_DMA_H
 #define _ASM_S390_DMA_H
 
-#include <asm/io.h>
+#include <linux/io.h>
 
 /*
  * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated
  * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
  * by the 31 bit heritage.
  */
-#define MAX_DMA_ADDRESS         0x80000000
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy	(0)
-#endif
+#define MAX_DMA_ADDRESS		__va(0x80000000)
 
 #endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 4f21ae561e4d..390906b8e386 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -9,6 +9,7 @@
 #define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
 #define CFI_ADJUST_CFA_OFFSET	.cfi_adjust_cfa_offset
 #define CFI_RESTORE		.cfi_restore
+#define CFI_REL_OFFSET		.cfi_rel_offset
 
 #ifdef CONFIG_AS_CFI_VAL_OFFSET
 #define CFI_VAL_OFFSET		.cfi_val_offset
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 06f795855af7..c4589ec4505e 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/device.h>
 #include <linux/blk_types.h>
+#include <asm/dma-types.h>
 
 struct arqb {
 	u64 data;
@@ -45,7 +46,7 @@ struct msb {
 	u16:12;
 	u16 bs:4;
 	u32 blk_count;
-	u64 data_addr;
+	dma64_t data_addr;
 	u64 scm_addr;
 	u64:64;
 } __packed;
@@ -54,7 +55,7 @@ struct aidaw {
 	u8 flags;
 	u32 :24;
 	u32 :32;
-	u64 data_addr;
+	dma64_t data_addr;
 } __packed;
 
 #define MSB_OC_CLEAR	0
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index efb50fc6866c..7164cb658435 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -22,18 +22,18 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
 static inline void
 codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
 {
-	if (nr-- <= 0)
+	if (!nr--)
 		return;
 	asm volatile(
-		"	bras	1,1f\n"
-		"	tr	0(1,%0),0(%2)\n"
-		"0:	tr	0(256,%0),0(%2)\n"
+		"	j	2f\n"
+		"0:	tr	0(1,%0),0(%2)\n"
+		"1:	tr	0(256,%0),0(%2)\n"
 		"	la	%0,256(%0)\n"
-		"1:	ahi	%1,-256\n"
-		"	jnm	0b\n"
-		"	ex	%1,0(1)"
+		"2:	aghi	%1,-256\n"
+		"	jnm	1b\n"
+		"	exrl	%1,0b"
 		: "+&a" (addr), "+&a" (nr)
-		: "a" (codepage) : "cc", "memory", "1");
+		: "a" (codepage) : "cc", "memory");
 }
 
 #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 70a30ae258b7..a03df312081e 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -91,6 +91,14 @@
 /* Keep this the last entry.  */
 #define R_390_NUM	61
 
+/*
+ * HWCAP flags - for AT_HWCAP
+ *
+ * Bits 32-63 are reserved for use by libc.
+ * Bit 31 is reserved and will be used by libc to determine if a second
+ * argument is passed to IFUNC resolvers. This will be implemented when
+ * there is a need for AT_HWCAP2.
+ */
 enum {
 	HWCAP_NR_ESAN3		= 0,
 	HWCAP_NR_ZARCH		= 1,
@@ -150,9 +158,6 @@ enum {
 #define ELF_DATA	ELFDATA2MSB
 #define ELF_ARCH	EM_S390
 
-/* s390 specific phdr types */
-#define PT_S390_PGSTE	0x70000000
-
 /*
  * ELF register definitions..
  */
@@ -183,35 +188,6 @@ typedef s390_compat_regs compat_elf_gregset_t;
 	 && (x)->e_ident[EI_CLASS] == ELF_CLASS)
 #define compat_start_thread	start_thread31
 
-struct arch_elf_state {
-	int rc;
-};
-
-#define INIT_ARCH_ELF_STATE { .rc = 0 }
-
-#define arch_check_elf(ehdr, interp, interp_ehdr, state) (0)
-#ifdef CONFIG_PGSTE
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state)	\
-({								\
-	struct arch_elf_state *_state = state;			\
-	if ((phdr)->p_type == PT_S390_PGSTE &&			\
-	    !page_table_allocate_pgste &&			\
-	    !test_thread_flag(TIF_PGSTE) &&			\
-	    !current->mm->context.alloc_pgste) {		\
-		set_thread_flag(TIF_PGSTE);			\
-		set_pt_regs_flag(task_pt_regs(current),		\
-				 PIF_EXECVE_PGSTE_RESTART);	\
-		_state->rc = -EAGAIN;				\
-	}							\
-	_state->rc;						\
-})
-#else
-#define arch_elf_pt_proc(ehdr, phdr, elf, interp, state)	\
-({								\
-	(state)->rc;						\
-})
-#endif
-
 /* For SVR4/S390 the function pointer to be registered with `atexit` is
    passed in R14. */
 #define ELF_PLAT_INIT(_r, load_addr) \
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 000de2b1e67a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -8,7 +8,7 @@
 #include <linux/processor.h>
 #include <linux/uaccess.h>
 #include <asm/timex.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 #include <asm/pai.h>
 
 #define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
@@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
 
 static __always_inline void arch_exit_to_user_mode(void)
 {
-	if (test_cpu_flag(CIF_FPU))
-		__load_fpu_regs();
+	load_user_fpu_regs();
 
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
 		debug_user_asce(1);
@@ -55,14 +54,9 @@ static __always_inline void arch_exit_to_user_mode(void)
 static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 						  unsigned long ti_work)
 {
-	choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+	choose_random_kstack_offset(get_tod_clock_fast());
 }
 
 #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 
-static inline bool on_thread_stack(void)
-{
-	return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
-}
-
 #endif
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
index 568fd81bb77b..e0a06060afdd 100644
--- a/arch/s390/include/asm/extmem.h
+++ b/arch/s390/include/asm/extmem.h
@@ -8,6 +8,13 @@
 #define _ASM_S390X_DCSS_H
 #ifndef __ASSEMBLY__
 
+/*
+ * DCSS segment is defined as a contiguous range of pages using DEFSEG command.
+ * The range start and end is a page number with a value less than or equal to
+ * 0x7ffffff (see CP Commands and Utilities Reference).
+ */
+#define MAX_DCSS_ADDR	(512UL * SZ_1G)
+
 /* possible values for segment type as returned by segment_info */
 #define SEG_TYPE_SW 0
 #define SEG_TYPE_EW 1
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 94b6919026df..5f5b1aa6c233 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -14,13 +14,12 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/preempt.h>
-
+#include <asm/alternative.h>
 #include <asm/lowcore.h>
 
 #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
 
 extern u64 stfle_fac_list[16];
-extern u64 alt_stfle_fac_list[16];
 
 static inline void __set_facility(unsigned long nr, void *facilities)
 {
@@ -40,33 +39,56 @@ static inline void __clear_facility(unsigned long nr, void *facilities)
 	ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
 }
 
-static inline int __test_facility(unsigned long nr, void *facilities)
+static __always_inline bool __test_facility(unsigned long nr, void *facilities)
 {
 	unsigned char *ptr;
 
 	if (nr >= MAX_FACILITY_BIT)
-		return 0;
+		return false;
 	ptr = (unsigned char *) facilities + (nr >> 3);
 	return (*ptr & (0x80 >> (nr & 7))) != 0;
 }
 
 /*
+ * __test_facility_constant() generates a single instruction branch. If the
+ * tested facility is available (likely) the branch is patched into a nop.
+ *
+ * Do not use this function unless you know what you are doing. All users are
+ * supposed to use test_facility() which will do the right thing.
+ */
+static __always_inline bool __test_facility_constant(unsigned long nr)
+{
+	asm goto(
+		ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FACILITY(%[nr]))
+		:
+		: [nr] "i" (nr)
+		:
+		: l_no);
+	return true;
+l_no:
+	return false;
+}
+
+/*
  * The test_facility function uses the bit ordering where the MSB is bit 0.
  * That makes it easier to query facility bits with the bit number as
  * documented in the Principles of Operation.
  */
-static inline int test_facility(unsigned long nr)
+static __always_inline bool test_facility(unsigned long nr)
 {
 	unsigned long facilities_als[] = { FACILITIES_ALS };
 
-	if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) {
-		if (__test_facility(nr, &facilities_als))
-			return 1;
+	if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(nr)) {
+		if (nr < sizeof(facilities_als) * 8) {
+			if (__test_facility(nr, &facilities_als))
+				return true;
+		}
+		return __test_facility_constant(nr);
 	}
 	return __test_facility(nr, &stfle_fac_list);
 }
 
-static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+static inline unsigned long __stfle_asm(u64 *fac_list, int size)
 {
 	unsigned long reg0 = size - 1;
 
@@ -74,7 +96,7 @@ static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
 		"	lgr	0,%[reg0]\n"
 		"	.insn	s,0xb2b00000,%[list]\n" /* stfle */
 		"	lgr	%[reg0],0\n"
-		: [reg0] "+&d" (reg0), [list] "+Q" (*stfle_fac_list)
+		: [reg0] "+&d" (reg0), [list] "+Q" (*fac_list)
 		:
 		: "memory", "cc", "0");
 	return reg0;
@@ -82,33 +104,39 @@ static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
 
 /**
  * stfle - Store facility list extended
- * @stfle_fac_list: array where facility list can be stored
+ * @fac_list: array where facility list can be stored
  * @size: size of passed in array in double words
  */
-static inline void __stfle(u64 *stfle_fac_list, int size)
+static inline void __stfle(u64 *fac_list, int size)
 {
 	unsigned long nr;
 	u32 stfl_fac_list;
 
 	asm volatile(
 		"	stfl	0(0)\n"
-		: "=m" (S390_lowcore.stfl_fac_list));
-	stfl_fac_list = S390_lowcore.stfl_fac_list;
-	memcpy(stfle_fac_list, &stfl_fac_list, 4);
+		: "=m" (get_lowcore()->stfl_fac_list));
+	stfl_fac_list = get_lowcore()->stfl_fac_list;
+	memcpy(fac_list, &stfl_fac_list, 4);
 	nr = 4; /* bytes stored by stfl */
 	if (stfl_fac_list & 0x01000000) {
 		/* More facility bits available with stfle */
-		nr = __stfle_asm(stfle_fac_list, size);
+		nr = __stfle_asm(fac_list, size);
 		nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
 	}
-	memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+	memset((char *)fac_list + nr, 0, size * 8 - nr);
 }
 
-static inline void stfle(u64 *stfle_fac_list, int size)
+static inline void stfle(u64 *fac_list, int size)
 {
 	preempt_disable();
-	__stfle(stfle_fac_list, size);
+	__stfle(fac_list, size);
 	preempt_enable();
 }
 
+/**
+ * stfle_size - Actual size of the facility list as specified by stfle
+ * (number of double words)
+ */
+unsigned int stfle_size(void);
+
 #endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fault.h b/arch/s390/include/asm/fault.h
new file mode 100644
index 000000000000..d326f56603d6
--- /dev/null
+++ b/arch/s390/include/asm/fault.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_FAULT_H
+#define _ASM_S390_FAULT_H
+
+union teid {
+	unsigned long val;
+	struct {
+		unsigned long addr : 52; /* Translation-exception Address */
+		unsigned long fsi  : 2;	 /* Access Exception Fetch/Store Indication */
+		unsigned long	   : 2;
+		unsigned long b56  : 1;
+		unsigned long	   : 3;
+		unsigned long b60  : 1;
+		unsigned long b61  : 1;
+		unsigned long as   : 2;	 /* ASCE Identifier */
+	};
+};
+
+enum {
+	TEID_FSI_UNKNOWN = 0, /* Unknown whether fetch or store */
+	TEID_FSI_STORE	 = 1, /* Exception was due to store operation */
+	TEID_FSI_FETCH	 = 2  /* Exception was due to fetch operation */
+};
+
+#endif /* _ASM_S390_FAULT_H */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index b8a028a36173..80f82a739b45 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -10,6 +10,7 @@
 #define _ASM_S390_FCX_H
 
 #include <linux/types.h>
+#include <asm/dma-types.h>
 
 #define TCW_FORMAT_DEFAULT		0
 #define TCW_TIDAW_FORMAT_DEFAULT	0
@@ -43,16 +44,16 @@ struct tcw {
 	u32 r:1;
 	u32 w:1;
 	u32 :16;
-	u64 output;
-	u64 input;
-	u64 tsb;
-	u64 tccb;
+	dma64_t output;
+	dma64_t input;
+	dma64_t tsb;
+	dma64_t tccb;
 	u32 output_count;
 	u32 input_count;
 	u32 :32;
 	u32 :32;
 	u32 :32;
-	u32 intrg;
+	dma32_t intrg;
 } __attribute__ ((packed, aligned(64)));
 
 #define TIDAW_FLAGS_LAST		(1 << (7 - 0))
@@ -73,7 +74,7 @@ struct tidaw {
 	u32 flags:8;
 	u32 :24;
 	u32 count;
-	u64 addr;
+	dma64_t addr;
 } __attribute__ ((packed, aligned(16)));
 
 /**
@@ -286,7 +287,7 @@ struct tccb_tcat {
  */
 struct tccb {
 	struct tccb_tcah tcah;
-	u8 tca[0];
+	u8 tca[];
 } __attribute__ ((packed, aligned(8)));
 
 struct tcw *tcw_get_intrg(struct tcw *tcw);
diff --git a/arch/s390/include/asm/fprobe.h b/arch/s390/include/asm/fprobe.h
new file mode 100644
index 000000000000..5ef600b372f4
--- /dev/null
+++ b/arch/s390/include/asm/fprobe.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_FPROBE_H
+#define _ASM_S390_FPROBE_H
+
+#include <asm-generic/fprobe.h>
+
+#undef FPROBE_HEADER_MSB_PATTERN
+#define FPROBE_HEADER_MSB_PATTERN 0
+
+#endif /* _ASM_S390_FPROBE_H */
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/fpu-insn-asm.h
index 95480ed9149e..d296322be4bc 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -9,11 +9,14 @@
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
-#ifndef __ASM_S390_VX_INSN_H
-#define __ASM_S390_VX_INSN_H
+#ifndef __ASM_S390_FPU_INSN_ASM_H
+#define __ASM_S390_FPU_INSN_ASM_H
 
-#ifdef __ASSEMBLY__
+#ifndef __ASM_S390_FPU_INSN_H
+#error only <asm/fpu-insn.h> can be included directly
+#endif
 
+#ifdef __ASSEMBLY__
 
 /* Macros to generate vector instruction byte code */
 
@@ -192,10 +195,26 @@
 /* RXB - Compute most significant bit used vector registers
  *
  * @rxb:	Operand to store computed RXB value
- * @v1:		First vector register designated operand
- * @v2:		Second vector register designated operand
- * @v3:		Third vector register designated operand
- * @v4:		Fourth vector register designated operand
+ * @v1:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 0 (instruction bit 36) and whose remaining bits
+ *		are stored in instruction bits 8-11.
+ * @v2:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 1 (instruction bit 37) and whose remaining bits
+ *		are stored in instruction bits 12-15.
+ * @v3:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 2 (instruction bit 38) and whose remaining bits
+ *		are stored in instruction bits 16-19.
+ * @v4:		Vector register designated operand whose MSB is stored in
+ *		RXB bit 3 (instruction bit 39) and whose remaining bits
+ *		are stored in instruction bits 32-35.
+ *
+ * Note: In most vector instruction formats [1] V1, V2, V3, and V4 directly
+ * correspond to @v1, @v2, @v3, and @v4. But there are exceptions, such as but
+ * not limited to the vector instruction formats VRR-g, VRR-h, VRS-a, VRS-d,
+ * and VSI.
+ *
+ * [1] IBM z/Architecture Principles of Operation, chapter "Program
+ * Execution, section "Instructions", subsection "Instruction Formats".
  */
 .macro	RXB	rxb v1 v2=0 v3=0 v4=0
 	\rxb = 0
@@ -220,6 +239,9 @@
  * @v2:		Second vector register designated operand (for RXB)
  * @v3:		Third vector register designated operand (for RXB)
  * @v4:		Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
  */
 .macro	MRXB	m v1 v2=0 v3=0 v4=0
 	rxb = 0
@@ -235,6 +257,9 @@
  * @v2:		Second vector register designated operand (for RXB)
  * @v3:		Third vector register designated operand (for RXB)
  * @v4:		Fourth vector register designated operand (for RXB)
+ *
+ * Note: For @v1, @v2, @v3, and @v4 also refer to the RXB macro
+ * description for further details.
  */
 .macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
 	MRXB	\m, \v1, \v2, \v3, \v4
@@ -347,7 +372,7 @@
 	VX_NUM	v3, \vr
 	.word	0xE700 | (r1 << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
-	MRXBOPC	\m, 0x21, v3
+	MRXBOPC	\m, 0x21, 0, v3
 .endm
 .macro	VLGVB	gr, vr, disp, base="%r0"
 	VLGV	\gr, \vr, \disp, \base, 0
@@ -382,6 +407,28 @@
 	MRXBOPC	0, 0x0E, v1
 .endm
 
+/* VECTOR STORE BYTE REVERSED ELEMENTS */
+	.macro	VSTBR	vr1, disp, index="%r0", base, m
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE600 | ((v1&15) << 4) | (x2&15)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x0E, v1
+.endm
+.macro	VSTBRH	vr1, disp, index="%r0", base
+	VSTBR	\vr1, \disp, \index, \base, 1
+.endm
+.macro	VSTBRF	vr1, disp, index="%r0", base
+	VSTBR	\vr1, \disp, \index, \base, 2
+.endm
+.macro	VSTBRG	vr1, disp, index="%r0", base
+	VSTBR	\vr1, \disp, \index, \base, 3
+.endm
+.macro	VSTBRQ	vr1, disp, index="%r0", base
+	VSTBR	\vr1, \disp, \index, \base, 4
+.endm
+
 /* VECTOR STORE MULTIPLE */
 .macro	VSTM	vfrom, vto, disp, base, hint=3
 	VX_NUM	v1, \vfrom
@@ -496,6 +543,25 @@
 	VMRL	\vr1, \vr2, \vr3, 3
 .endm
 
+/* VECTOR LOAD WITH LENGTH */
+.macro VLL	v, gr, disp, base
+	VX_NUM	v1, \v
+	GR_NUM	b2, \base
+	GR_NUM	r3, \gr
+	.word	0xE700 | ((v1&15) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x37, v1
+.endm
+
+/* VECTOR STORE WITH LENGTH */
+.macro VSTL	v, gr, disp, base
+	VX_NUM	v1, \v
+	GR_NUM	b2, \base
+	GR_NUM	r3, \gr
+	.word	0xE700 | ((v1&15) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x3f, v1
+.endm
 
 /* Vector integer instructions */
 
@@ -509,6 +575,16 @@
 	MRXBOPC	0, 0x68, v1, v2, v3
 .endm
 
+/* VECTOR CHECKSUM */
+.macro VCKSM	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC 0, 0x66, v1, v2, v3
+.endm
+
 /* VECTOR EXCLUSIVE OR */
 .macro	VX	vr1, vr2, vr3
 	VX_NUM	v1, \vr1
@@ -675,4 +751,4 @@
 .endm
 
 #endif	/* __ASSEMBLY__ */
-#endif	/* __ASM_S390_VX_INSN_H */
+#endif	/* __ASM_S390_FPU_INSN_ASM_H */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h
new file mode 100644
index 000000000000..f668bffd6dd3
--- /dev/null
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -0,0 +1,479 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for Floating Point and Vector Instructions
+ *
+ */
+
+#ifndef __ASM_S390_FPU_INSN_H
+#define __ASM_S390_FPU_INSN_H
+
+#include <asm/fpu-insn-asm.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/instrumented.h>
+#include <asm/asm-extable.h>
+
+asm(".include \"asm/fpu-insn-asm.h\"\n");
+
+/*
+ * Various small helper functions, which can and should be used within
+ * kernel fpu code sections. Each function represents only one floating
+ * point or vector instruction (except for helper functions which require
+ * exception handling).
+ *
+ * This allows to use floating point and vector instructions like C
+ * functions, which has the advantage that all supporting code, like
+ * e.g. loops, can be written in easy to read C code.
+ *
+ * Each of the helper functions provides support for code instrumentation,
+ * like e.g. KASAN. Therefore instrumentation is also covered automatically
+ * when using these functions.
+ *
+ * In order to ensure that code generated with the helper functions stays
+ * within kernel fpu sections, which are guarded with kernel_fpu_begin()
+ * and kernel_fpu_end() calls, each function has a mandatory "memory"
+ * barrier.
+ */
+
+static __always_inline void fpu_cefbr(u8 f1, s32 val)
+{
+	asm volatile("cefbr	%[f1],%[val]\n"
+		     :
+		     : [f1] "I" (f1), [val] "d" (val)
+		     : "memory");
+}
+
+static __always_inline unsigned long fpu_cgebr(u8 f2, u8 mode)
+{
+	unsigned long val;
+
+	asm volatile("cgebr	%[val],%[mode],%[f2]\n"
+		     : [val] "=d" (val)
+		     : [f2] "I" (f2), [mode] "I" (mode)
+		     : "memory");
+	return val;
+}
+
+static __always_inline void fpu_debr(u8 f1, u8 f2)
+{
+	asm volatile("debr	%[f1],%[f2]\n"
+		     :
+		     : [f1] "I" (f1), [f2] "I" (f2)
+		     : "memory");
+}
+
+static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg)
+{
+	instrument_read(reg, sizeof(*reg));
+	asm volatile("ld	 %[fpr],%[reg]\n"
+		     :
+		     : [fpr] "I" (fpr), [reg] "Q" (reg->ui)
+		     : "memory");
+}
+
+static __always_inline void fpu_ldgr(u8 f1, u32 val)
+{
+	asm volatile("ldgr	%[f1],%[val]\n"
+		     :
+		     : [f1] "I" (f1), [val] "d" (val)
+		     : "memory");
+}
+
+static __always_inline void fpu_lfpc(unsigned int *fpc)
+{
+	instrument_read(fpc, sizeof(*fpc));
+	asm volatile("lfpc	%[fpc]"
+		     :
+		     : [fpc] "Q" (*fpc)
+		     : "memory");
+}
+
+/**
+ * fpu_lfpc_safe - Load floating point control register safely.
+ * @fpc: new value for floating point control register
+ *
+ * Load floating point control register. This may lead to an exception,
+ * since a saved value may have been modified by user space (ptrace,
+ * signal return, kvm registers) to an invalid value. In such a case
+ * set the floating point control register to zero.
+ */
+static inline void fpu_lfpc_safe(unsigned int *fpc)
+{
+	instrument_read(fpc, sizeof(*fpc));
+	asm_inline volatile(
+		"	lfpc	%[fpc]\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE_FPC(0b, 0b)
+		:
+		: [fpc] "Q" (*fpc)
+		: "memory");
+}
+
+static __always_inline void fpu_std(unsigned short fpr, freg_t *reg)
+{
+	instrument_write(reg, sizeof(*reg));
+	asm volatile("std	 %[fpr],%[reg]\n"
+		     : [reg] "=Q" (reg->ui)
+		     : [fpr] "I" (fpr)
+		     : "memory");
+}
+
+static __always_inline void fpu_sfpc(unsigned int fpc)
+{
+	asm volatile("sfpc	%[fpc]"
+		     :
+		     : [fpc] "d" (fpc)
+		     : "memory");
+}
+
+static __always_inline void fpu_stfpc(unsigned int *fpc)
+{
+	instrument_write(fpc, sizeof(*fpc));
+	asm volatile("stfpc	%[fpc]"
+		     : [fpc] "=Q" (*fpc)
+		     :
+		     : "memory");
+}
+
+static __always_inline void fpu_vab(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VAB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VCKSM	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vesravb(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VESRAVB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vgfmag(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+	asm volatile("VGFMAG	%[v1],%[v2],%[v3],%[v4]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+		     : "memory");
+}
+
+static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VGFMG	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+	instrument_read(vxr, sizeof(__vector128));
+	asm volatile("VL	%[v1],%O[vxr],,%R[vxr]\n"
+		     :
+		     : [vxr] "Q" (*(__vector128 *)vxr),
+		       [v1] "I" (v1)
+		     : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vl(u8 v1, const void *vxr)
+{
+	instrument_read(vxr, sizeof(__vector128));
+	asm volatile(
+		"	la	1,%[vxr]\n"
+		"	VL	%[v1],0,,1\n"
+		:
+		: [vxr] "R" (*(__vector128 *)vxr),
+		  [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vleib(u8 v, s16 val, u8 index)
+{
+	asm volatile("VLEIB	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+		     : "memory");
+}
+
+static __always_inline void fpu_vleig(u8 v, s16 val, u8 index)
+{
+	asm volatile("VLEIG	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "K" (val), [index] "I" (index)
+		     : "memory");
+}
+
+static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
+{
+	u64 val;
+
+	asm volatile("VLGVF	%[val],%[v],%[index]"
+		     : [val] "=d" (val)
+		     : [v] "I" (v), [index] "L" (index)
+		     : "memory");
+	return val;
+}
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_read(vxr, size);
+	asm volatile("VLL	%[v1],%[index],%O[vxr],%R[vxr]\n"
+		     :
+		     : [vxr] "Q" (*(u8 *)vxr),
+		       [index] "d" (index),
+		       [v1] "I" (v1)
+		     : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_read(vxr, size);
+	asm volatile(
+		"	la	1,%[vxr]\n"
+		"	VLL	%[v1],%[index],0,1\n"
+		:
+		: [vxr] "R" (*(u8 *)vxr),
+		  [index] "d" (index),
+		  [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+#define fpu_vlm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_read(_v, size);					\
+	asm volatile("VLM	%[v1],%[v3],%O[vxrs],%R[vxrs]\n"	\
+		     :							\
+		     : [vxrs] "Q" (*_v),				\
+		       [v1] "I" (_v1), [v3] "I" (_v3)			\
+		     : "memory");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#define fpu_vlm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_read(_v, size);					\
+	asm volatile(							\
+		"	la	1,%[vxrs]\n"				\
+		"	VLM	%[v1],%[v3],0,1\n"			\
+		:							\
+		: [vxrs] "R" (*_v),					\
+		  [v1] "I" (_v1), [v3] "I" (_v3)			\
+		: "memory", "1");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vlr(u8 v1, u8 v2)
+{
+	asm volatile("VLR	%[v1],%[v2]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
+{
+	asm volatile("VLVGF	%[v],%[val],%[index]"
+		     :
+		     : [v] "I" (v), [val] "d" (val), [index] "L" (index)
+		     : "memory");
+}
+
+static __always_inline void fpu_vn(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VN	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vperm(u8 v1, u8 v2, u8 v3, u8 v4)
+{
+	asm volatile("VPERM	%[v1],%[v2],%[v3],%[v4]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3), [v4] "I" (v4)
+		     : "memory");
+}
+
+static __always_inline void fpu_vrepib(u8 v1, s16 i2)
+{
+	asm volatile("VREPIB	%[v1],%[i2]"
+		     :
+		     : [v1] "I" (v1), [i2] "K" (i2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VSRLB	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+	instrument_write(vxr, sizeof(__vector128));
+	asm volatile("VST	%[v1],%O[vxr],,%R[vxr]\n"
+		     : [vxr] "=Q" (*(__vector128 *)vxr)
+		     : [v1] "I" (v1)
+		     : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+	instrument_write(vxr, sizeof(__vector128));
+	asm volatile(
+		"	la	1,%[vxr]\n"
+		"	VST	%[v1],0,,1\n"
+		: [vxr] "=R" (*(__vector128 *)vxr)
+		: [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_write(vxr, size);
+	asm volatile("VSTL	%[v1],%[index],%O[vxr],%R[vxr]\n"
+		     : [vxr] "=Q" (*(u8 *)vxr)
+		     : [index] "d" (index), [v1] "I" (v1)
+		     : "memory");
+}
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+	unsigned int size;
+
+	size = min(index + 1, sizeof(__vector128));
+	instrument_write(vxr, size);
+	asm volatile(
+		"	la	1,%[vxr]\n"
+		"	VSTL	%[v1],%[index],0,1\n"
+		: [vxr] "=R" (*(u8 *)vxr)
+		: [index] "d" (index), [v1] "I" (v1)
+		: "memory", "1");
+}
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS
+
+#define fpu_vstm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_write(_v, size);					\
+	asm volatile("VSTM	%[v1],%[v3],%O[vxrs],%R[vxrs]\n"	\
+		     : [vxrs] "=Q" (*_v)				\
+		     : [v1] "I" (_v1), [v3] "I" (_v3)			\
+		     : "memory");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#define fpu_vstm(_v1, _v3, _vxrs)					\
+({									\
+	unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);	\
+	struct {							\
+		__vector128 _v[(_v3) - (_v1) + 1];			\
+	} *_v = (void *)(_vxrs);					\
+									\
+	instrument_write(_v, size);					\
+	asm volatile(							\
+		"	la	1,%[vxrs]\n"				\
+		"	VSTM	%[v1],%[v3],0,1\n"			\
+		: [vxrs] "=R" (*_v)					\
+		: [v1] "I" (_v1), [v3] "I" (_v3)			\
+		: "memory", "1");					\
+	(_v3) - (_v1) + 1;						\
+})
+
+#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+static __always_inline void fpu_vupllf(u8 v1, u8 v2)
+{
+	asm volatile("VUPLLF	%[v1],%[v2]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2)
+		     : "memory");
+}
+
+static __always_inline void fpu_vx(u8 v1, u8 v2, u8 v3)
+{
+	asm volatile("VX	%[v1],%[v2],%[v3]"
+		     :
+		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
+		     : "memory");
+}
+
+static __always_inline void fpu_vzero(u8 v)
+{
+	asm volatile("VZERO	%[v]"
+		     :
+		     : [v] "I" (v)
+		     : "memory");
+}
+
+#endif /* __ASSEMBLY__ */
+#endif	/* __ASM_S390_FPU_INSN_H */
diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h
new file mode 100644
index 000000000000..8d58d5a95399
--- /dev/null
+++ b/arch/s390/include/asm/fpu-types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+	u32 fpc;
+	__vector128 vxrs[__NUM_VXRS] __aligned(8);
+};
+
+struct kernel_fpu_hdr {
+	int	mask;
+	u32	fpc;
+};
+
+struct kernel_fpu {
+	struct kernel_fpu_hdr hdr;
+	__vector128 vxrs[] __aligned(8);
+};
+
+#define KERNEL_FPU_STRUCT(vxr_size)				\
+struct kernel_fpu_##vxr_size {					\
+	struct kernel_fpu_hdr hdr;				\
+	__vector128 vxrs[vxr_size] __aligned(8);		\
+}
+
+KERNEL_FPU_STRUCT(8);
+KERNEL_FPU_STRUCT(16);
+KERNEL_FPU_STRUCT(32);
+
+#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name)		\
+	struct kernel_fpu_##vxr_size name __uninitialized
+
+#define DECLARE_KERNEL_FPU_ONSTACK8(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(8, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK16(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(16, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK32(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(32, name)
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
new file mode 100644
index 000000000000..960c6c67ad6c
--- /dev/null
+++ b/arch/s390/include/asm/fpu.h
@@ -0,0 +1,290 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * In-kernel FPU support functions
+ *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_H
+#define _ASM_S390_FPU_H
+
+#include <linux/cpufeature.h>
+#include <linux/processor.h>
+#include <linux/preempt.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <asm/sigcontext.h>
+#include <asm/fpu-types.h>
+#include <asm/fpu-insn.h>
+
+enum {
+	KERNEL_FPC_BIT = 0,
+	KERNEL_VXR_V0V7_BIT,
+	KERNEL_VXR_V8V15_BIT,
+	KERNEL_VXR_V16V23_BIT,
+	KERNEL_VXR_V24V31_BIT,
+};
+
+#define KERNEL_FPC		BIT(KERNEL_FPC_BIT)
+#define KERNEL_VXR_V0V7		BIT(KERNEL_VXR_V0V7_BIT)
+#define KERNEL_VXR_V8V15	BIT(KERNEL_VXR_V8V15_BIT)
+#define KERNEL_VXR_V16V23	BIT(KERNEL_VXR_V16V23_BIT)
+#define KERNEL_VXR_V24V31	BIT(KERNEL_VXR_V24V31_BIT)
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7   | KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15  | KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31)
+
+#define KERNEL_VXR		(KERNEL_VXR_LOW	   | KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC	   | KERNEL_VXR_LOW)
+
+void load_fpu_state(struct fpu *state, int flags);
+void save_fpu_state(struct fpu *state, int flags);
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
+void __kernel_fpu_end(struct kernel_fpu *state, int flags);
+
+static __always_inline void save_vx_regs(__vector128 *vxrs)
+{
+	fpu_vstm(0, 15, &vxrs[0]);
+	fpu_vstm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void load_vx_regs(__vector128 *vxrs)
+{
+	fpu_vlm(0, 15, &vxrs[0]);
+	fpu_vlm(16, 31, &vxrs[16]);
+}
+
+static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
+{
+	fpu_std(0, &fprs[0 * offset]);
+	fpu_std(1, &fprs[1 * offset]);
+	fpu_std(2, &fprs[2 * offset]);
+	fpu_std(3, &fprs[3 * offset]);
+	fpu_std(4, &fprs[4 * offset]);
+	fpu_std(5, &fprs[5 * offset]);
+	fpu_std(6, &fprs[6 * offset]);
+	fpu_std(7, &fprs[7 * offset]);
+	fpu_std(8, &fprs[8 * offset]);
+	fpu_std(9, &fprs[9 * offset]);
+	fpu_std(10, &fprs[10 * offset]);
+	fpu_std(11, &fprs[11 * offset]);
+	fpu_std(12, &fprs[12 * offset]);
+	fpu_std(13, &fprs[13 * offset]);
+	fpu_std(14, &fprs[14 * offset]);
+	fpu_std(15, &fprs[15 * offset]);
+}
+
+static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
+{
+	fpu_ld(0, &fprs[0 * offset]);
+	fpu_ld(1, &fprs[1 * offset]);
+	fpu_ld(2, &fprs[2 * offset]);
+	fpu_ld(3, &fprs[3 * offset]);
+	fpu_ld(4, &fprs[4 * offset]);
+	fpu_ld(5, &fprs[5 * offset]);
+	fpu_ld(6, &fprs[6 * offset]);
+	fpu_ld(7, &fprs[7 * offset]);
+	fpu_ld(8, &fprs[8 * offset]);
+	fpu_ld(9, &fprs[9 * offset]);
+	fpu_ld(10, &fprs[10 * offset]);
+	fpu_ld(11, &fprs[11 * offset]);
+	fpu_ld(12, &fprs[12 * offset]);
+	fpu_ld(13, &fprs[13 * offset]);
+	fpu_ld(14, &fprs[14 * offset]);
+	fpu_ld(15, &fprs[15 * offset]);
+}
+
+static __always_inline void save_fp_regs(freg_t *fprs)
+{
+	__save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs(freg_t *fprs)
+{
+	__load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
+}
+
+static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
+{
+	freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+	__save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
+{
+	freg_t *fprs = (freg_t *)&vxrs[0].high;
+
+	__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
+}
+
+static inline void load_user_fpu_regs(void)
+{
+	struct thread_struct *thread = &current->thread;
+
+	if (!thread->ufpu_flags)
+		return;
+	load_fpu_state(&thread->ufpu, thread->ufpu_flags);
+	thread->ufpu_flags = 0;
+}
+
+static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
+{
+	save_fpu_state(&thread->ufpu, flags);
+	__atomic_or(flags, &thread->ufpu_flags);
+}
+
+static inline void save_user_fpu_regs(void)
+{
+	struct thread_struct *thread = &current->thread;
+	int mask, flags;
+
+	mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
+	flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
+	if (flags)
+		__save_user_fpu_regs(thread, flags);
+	barrier();
+	WRITE_ONCE(thread->kfpu_flags, mask);
+}
+
+static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
+{
+	struct thread_struct *thread = &current->thread;
+	int mask, uflags;
+
+	mask = __atomic_or(flags, &thread->kfpu_flags);
+	state->hdr.mask = mask;
+	uflags = READ_ONCE(thread->ufpu_flags);
+	if ((uflags & flags) != flags)
+		__save_user_fpu_regs(thread, ~uflags & flags);
+	if (mask & flags)
+		__kernel_fpu_begin(state, flags);
+}
+
+static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
+{
+	int mask = state->hdr.mask;
+
+	if (mask & flags)
+		__kernel_fpu_end(state, flags);
+	barrier();
+	WRITE_ONCE(current->thread.kfpu_flags, mask);
+}
+
+void __kernel_fpu_invalid_size(void);
+
+static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
+{
+	unsigned int cnt = 0;
+
+	if (flags & KERNEL_VXR_V0V7)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V8V15)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V16V23)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V24V31)
+		cnt += 8;
+	if (cnt != size)
+		__kernel_fpu_invalid_size();
+}
+
+#define kernel_fpu_begin(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_begin((struct kernel_fpu *)s, _flags);		\
+}
+
+#define kernel_fpu_end(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_end((struct kernel_fpu *)s, _flags);		\
+}
+
+static inline void save_kernel_fpu_regs(struct thread_struct *thread)
+{
+	if (!thread->kfpu_flags)
+		return;
+	save_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
+{
+	if (!thread->kfpu_flags)
+		return;
+	load_fpu_state(&thread->kfpu, thread->kfpu_flags);
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i].ui = vxrs[i].high;
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		vxrs[i].high = fprs[i].ui;
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	fpregs->fpc = fpu->fpc;
+	convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpu->fpc = fpregs->fpc;
+	convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+}
+
+#endif /* _ASM_S390_FPU_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
deleted file mode 100644
index b714ed0ef688..000000000000
--- a/arch/s390/include/asm/fpu/api.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * In-kernel FPU support functions
- *
- *
- * Consider these guidelines before using in-kernel FPU functions:
- *
- *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
- *     use of floating-point or vector registers and instructions.
- *
- *  2. For kernel_fpu_begin(), specify the vector register range you want to
- *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
- *
- *     a) If your function typically runs in process-context, use the lower
- *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
- *     b) If your function typically runs in soft-irq or hard-irq context,
- *	  prefer using the upper half of the vector registers, for example,
- *	  specify KERNEL_VXR_HIGH.
- *
- *     If you adhere to these guidelines, an interrupted process context
- *     does not require to save and restore vector registers because of
- *     disjoint register ranges.
- *
- *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
- *     includes logic to save and restore up to 16 vector registers at once.
- *
- *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
- *     struct kernel_fpu states.  Vector registers that are in use by outer
- *     levels are saved and restored.  You can minimize the save and restore
- *     effort by choosing disjoint vector register ranges.
- *
- *  5. To use vector floating-point instructions, specify the KERNEL_FPC
- *     flag to save and restore floating-point controls in addition to any
- *     vector register range.
- *
- *  6. To use floating-point registers and instructions only, specify the
- *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
- *     registers V0 to V15 and floating-point controls.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_API_H
-#define _ASM_S390_FPU_API_H
-
-#include <linux/preempt.h>
-#include <asm/asm-extable.h>
-
-void save_fpu_regs(void);
-void load_fpu_regs(void);
-void __load_fpu_regs(void);
-
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=&d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-#define KERNEL_FPC		1
-#define KERNEL_VXR_V0V7		2
-#define KERNEL_VXR_V8V15	4
-#define KERNEL_VXR_V16V23	8
-#define KERNEL_VXR_V24V31	16
-
-#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
-#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
-#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
-
-#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
-#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_V0V7)
-
-struct kernel_fpu;
-
-/*
- * Note the functions below must be called with preemption disabled.
- * Do not enable preemption before calling __kernel_fpu_end() to prevent
- * an corruption of an existing kernel FPU state.
- *
- * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
- */
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
-
-
-static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
-{
-	preempt_disable();
-	state->mask = S390_lowcore.fpu_flags;
-	if (!test_cpu_flag(CIF_FPU))
-		/* Save user space FPU state and register contents */
-		save_fpu_regs();
-	else if (state->mask & flags)
-		/* Save FPU/vector register in-use by the kernel */
-		__kernel_fpu_begin(state, flags);
-	S390_lowcore.fpu_flags |= flags;
-}
-
-static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
-{
-	S390_lowcore.fpu_flags = state->mask;
-	if (state->mask & flags)
-		/* Restore FPU/vector register in-use by the kernel */
-		__kernel_fpu_end(state, flags);
-	preempt_enable();
-}
-
-#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
deleted file mode 100644
index 4a71dbbf76fb..000000000000
--- a/arch/s390/include/asm/fpu/internal.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU state and register content conversion primitives
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_INTERNAL_H
-#define _ASM_S390_FPU_INTERNAL_H
-
-#include <linux/string.h>
-#include <asm/ctl_reg.h>
-#include <asm/fpu/types.h>
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(struct vx_array *) vxrs) : : "1");
-}
-
-static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		fprs[i] = *(freg_t *)(vxrs + i);
-}
-
-static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
-{
-	int i;
-
-	for (i = 0; i < __NUM_FPRS; i++)
-		*(freg_t *)(vxrs + i) = fprs[i];
-}
-
-static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	fpregs->pad = 0;
-	fpregs->fpc = fpu->fpc;
-	if (MACHINE_HAS_VX)
-		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
-	else
-		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
-{
-	fpu->fpc = fpregs->fpc;
-	if (MACHINE_HAS_VX)
-		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
-	else
-		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
-		       sizeof(fpregs->fprs));
-}
-
-#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
deleted file mode 100644
index d889e9436865..000000000000
--- a/arch/s390/include/asm/fpu/types.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * FPU data structures
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#ifndef _ASM_S390_FPU_TYPES_H
-#define _ASM_S390_FPU_TYPES_H
-
-#include <asm/sigcontext.h>
-
-struct fpu {
-	__u32 fpc;		/* Floating-point control */
-	void *regs;		/* Pointer to the current save area */
-	union {
-		/* Floating-point register save area */
-		freg_t fprs[__NUM_FPRS];
-		/* Vector register save area */
-		__vector128 vxrs[__NUM_VXRS];
-	};
-};
-
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array { __vector128 _[__NUM_VXRS]; };
-
-/* In-kernel FPU state structure */
-struct kernel_fpu {
-	u32	    mask;
-	u32	    fpc;
-	union {
-		freg_t fprs[__NUM_FPRS];
-		__vector128 vxrs[__NUM_VXRS];
-	};
-};
-
-#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 6f80ec9c04be..185331e91f83 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -2,18 +2,28 @@
 #ifndef _ASM_S390_FTRACE_H
 #define _ASM_S390_FTRACE_H
 
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #define MCOUNT_INSN_SIZE	6
 
 #ifndef __ASSEMBLY__
+#include <asm/stacktrace.h>
 
-#ifdef CONFIG_CC_IS_CLANG
-/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
-#define ftrace_return_address(n) 0UL
-#else
-#define ftrace_return_address(n) __builtin_return_address(n)
-#endif
+static __always_inline unsigned long return_address(unsigned int n)
+{
+	struct stack_frame *sf;
+
+	if (!n)
+		return (unsigned long)__builtin_return_address(0);
+
+	sf = (struct stack_frame *)current_frame_address();
+	do {
+		sf = (struct stack_frame *)sf->back_chain;
+		if (!sf)
+			return 0;
+	} while (--n);
+	return sf->gprs[8];
+}
+#define ftrace_return_address(n) return_address(n)
 
 void ftrace_caller(void);
 
@@ -29,6 +39,7 @@ struct dyn_arch_ftrace { };
 
 struct module;
 struct dyn_ftrace;
+struct ftrace_ops;
 
 bool ftrace_need_init_nop(void);
 #define ftrace_need_init_nop ftrace_need_init_nop
@@ -40,26 +51,46 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	return addr;
 }
+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip))
 
-struct ftrace_regs {
-	struct pt_regs regs;
-};
+#include <linux/ftrace_regs.h>
 
 static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
 {
-	struct pt_regs *regs = &fregs->regs;
+	struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
 
 	if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS))
 		return regs;
 	return NULL;
 }
 
-static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs,
-							   unsigned long ip)
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+				    unsigned long ip)
+{
+	arch_ftrace_regs(fregs)->regs.psw.addr = ip;
+}
+
+#undef ftrace_regs_get_frame_pointer
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
 {
-	fregs->regs.psw.addr = ip;
+	return ftrace_regs_get_stack_pointer(fregs);
 }
 
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+	return arch_ftrace_regs(fregs)->regs.gprs[14];
+}
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs)	 do {		\
+		(_regs)->psw.mask = 0;					\
+		(_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr;		\
+		(_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15];		\
+	} while (0)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 /*
  * When an ftrace registered caller is tracing a function that is
  * also set by a register_ftrace_direct() call, it needs to be
@@ -67,10 +98,12 @@ static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *f
  * place the direct caller in the ORIG_GPR2 part of pt_regs. This
  * tells the ftrace_caller that there's a direct caller.
  */
-static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
 {
+	struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
 	regs->orig_gpr2 = addr;
 }
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 /*
  * Even though the system call numbers are identical for s390/s390x a
@@ -97,6 +130,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
 	return !strcmp(sym + 7, name) || !strcmp(sym + 8, name);
 }
 
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index e08c882dccaa..942f21c39697 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -2,79 +2,101 @@
 #ifndef _ASM_S390_FUTEX_H
 #define _ASM_S390_FUTEX_H
 
+#include <linux/instrumented.h>
 #include <linux/uaccess.h>
 #include <linux/futex.h>
 #include <asm/asm-extable.h>
 #include <asm/mmu_context.h>
 #include <asm/errno.h>
 
-#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
-	asm volatile(							\
-		"   sacf  256\n"					\
-		"0: l     %1,0(%6)\n"					\
-		"1:"insn						\
-		"2: cs    %1,%2,0(%6)\n"				\
-		"3: jl    1b\n"						\
-		"   lhi   %0,0\n"					\
-		"4: sacf  768\n"					\
-		EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b)		\
-		: "=d" (ret), "=&d" (oldval), "=&d" (newval),		\
-		  "=m" (*uaddr)						\
-		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
-		  "m" (*uaddr) : "cc");
+#define FUTEX_OP_FUNC(name, insn)						\
+static uaccess_kmsan_or_inline int						\
+__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr)			\
+{										\
+	bool sacf_flag;								\
+	int rc, new;								\
+										\
+	instrument_copy_from_user_before(old, uaddr, sizeof(*old));		\
+	sacf_flag = enable_sacf_uaccess();					\
+	asm_inline volatile(							\
+		"	sacf	256\n"						\
+		"0:	l	%[old],%[uaddr]\n"				\
+		"1:"insn							\
+		"2:	cs	%[old],%[new],%[uaddr]\n"			\
+		"3:	jl	1b\n"						\
+		"	lhi	%[rc],0\n"					\
+		"4:	sacf	768\n"						\
+		EX_TABLE_UA_FAULT(0b, 4b, %[rc])				\
+		EX_TABLE_UA_FAULT(1b, 4b, %[rc])				\
+		EX_TABLE_UA_FAULT(2b, 4b, %[rc])				\
+		EX_TABLE_UA_FAULT(3b, 4b, %[rc])				\
+		: [rc] "=d" (rc), [old] "=&d" (*old),				\
+		  [new] "=&d" (new), [uaddr] "+Q" (*uaddr)			\
+		: [oparg] "d" (oparg)						\
+		: "cc");							\
+	disable_sacf_uaccess(sacf_flag);					\
+	if (!rc)								\
+		instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0);	\
+	return rc;								\
+}
+
+FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n")
+FUTEX_OP_FUNC(or,  "lr %[new],%[old]\n or %[new],%[oparg]\n")
+FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n")
+FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n")
 
-static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
-		u32 __user *uaddr)
+static inline
+int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
-	int oldval = 0, newval, ret;
+	int old, rc;
 
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op("lr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+		rc = __futex_atomic_set(oparg, &old, uaddr);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+		rc = __futex_atomic_add(oparg, &old, uaddr);
 		break;
 	case FUTEX_OP_OR:
-		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+		rc = __futex_atomic_or(oparg, &old, uaddr);
 		break;
 	case FUTEX_OP_ANDN:
-		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+		rc = __futex_atomic_and(~oparg, &old, uaddr);
 		break;
 	case FUTEX_OP_XOR:
-		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
-				  ret, oldval, newval, uaddr, oparg);
+		rc = __futex_atomic_xor(oparg, &old, uaddr);
 		break;
 	default:
-		ret = -ENOSYS;
+		rc = -ENOSYS;
 	}
-
-	if (!ret)
-		*oval = oldval;
-
-	return ret;
+	if (!rc)
+		*oval = old;
+	return rc;
 }
 
-static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-						u32 oldval, u32 newval)
+static uaccess_kmsan_or_inline
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval)
 {
-	int ret;
+	bool sacf_flag;
+	int rc;
 
-	asm volatile(
-		"   sacf 256\n"
-		"0: cs   %1,%4,0(%5)\n"
-		"1: la   %0,0\n"
-		"2: sacf 768\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
-		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
-		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+	instrument_copy_from_user_before(uval, uaddr, sizeof(*uval));
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile(
+		"	sacf	256\n"
+		"0:	cs	%[old],%[new],%[uaddr]\n"
+		"1:	lhi	%[rc],0\n"
+		"2:	sacf	768\n"
+		EX_TABLE_UA_FAULT(0b, 2b, %[rc])
+		EX_TABLE_UA_FAULT(1b, 2b, %[rc])
+		: [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr)
+		: [new] "d" (newval)
 		: "cc", "memory");
+	disable_sacf_uaccess(sacf_flag);
 	*uval = oldval;
-	return ret;
+	instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0);
+	return rc;
 }
 
 #endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 40264f60b0da..66c5808fd011 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -17,13 +17,12 @@
 #define GMAP_NOTIFY_MPROT	0x1
 
 /* Status bits only for huge segment entries */
-#define _SEGMENT_ENTRY_GMAP_IN		0x8000	/* invalidation notify bit */
-#define _SEGMENT_ENTRY_GMAP_UC		0x4000	/* dirty (migration) */
+#define _SEGMENT_ENTRY_GMAP_IN		0x0800	/* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC		0x0002	/* dirty (migration) */
 
 /**
  * struct gmap_struct - guest address space
  * @list: list head for the mm->context gmap list
- * @crst_list: list of all crst tables used in the guest address space
  * @mm: pointer to the parent mm_struct
  * @guest_to_host: radix tree with guest to host address translation
  * @host_to_guest: radix tree with pointer to segment table entries
@@ -35,7 +34,6 @@
  * @guest_handle: protected virtual machine handle for the ultravisor
  * @host_to_rmap: radix tree with gmap_rmap lists
  * @children: list of shadow gmap structures
- * @pt_list: list of all page tables used in the shadow guest address space
  * @shadow_lock: spinlock to protect the shadow gmap list
  * @parent: pointer to the parent gmap for shadow guest address spaces
  * @orig_asce: ASCE for which the shadow page table has been created
@@ -45,7 +43,6 @@
  */
 struct gmap {
 	struct list_head list;
-	struct list_head crst_list;
 	struct mm_struct *mm;
 	struct radix_tree_root guest_to_host;
 	struct radix_tree_root host_to_guest;
@@ -61,7 +58,6 @@ struct gmap {
 	/* Additional data for shadow guest address spaces */
 	struct radix_tree_root host_to_rmap;
 	struct list_head children;
-	struct list_head pt_list;
 	spinlock_t shadow_lock;
 	struct gmap *parent;
 	unsigned long orig_asce;
@@ -106,26 +102,20 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit);
 void gmap_remove(struct gmap *gmap);
 struct gmap *gmap_get(struct gmap *gmap);
 void gmap_put(struct gmap *gmap);
+void gmap_free(struct gmap *gmap);
+struct gmap *gmap_alloc(unsigned long limit);
 
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-struct gmap *gmap_get_enabled(void);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len);
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
 unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
 void __gmap_zap(struct gmap *, unsigned long gaddr);
 void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
 
 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val);
 
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
-			 int edat_level);
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+void gmap_unshadow(struct gmap *sg);
 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		    int fake);
 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
@@ -134,18 +124,51 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		    int fake);
 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 		    int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
-			   unsigned long *pgt, int *dat_protection, int *fake);
 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
 
 void gmap_register_pte_notifier(struct gmap_notifier *);
 void gmap_unregister_pte_notifier(struct gmap_notifier *);
 
-int gmap_mprotect_notify(struct gmap *, unsigned long start,
-			 unsigned long len, int prot);
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits);
 
 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
 			     unsigned long gaddr, unsigned long vmaddr);
-int gmap_mark_unmergeable(void);
-void s390_reset_acc(struct mm_struct *mm);
+int s390_replace_asce(struct gmap *gmap);
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool interruptible);
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level);
+
+/**
+ * s390_uv_destroy_range - Destroy a range of pages in the given mm.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls, but
+ * it will otherwise only return when it completed.
+ */
+static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+					 unsigned long end)
+{
+	(void)__s390_uv_destroy_range(mm, start, end, false);
+}
+
+/**
+ * s390_uv_destroy_range_interruptible - Destroy a range of pages in the
+ * given mm, but stop when a fatal signal is received.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls. If
+ * a fatal signal is received, it will return with -EINTR immediately,
+ * without finishing destroying the whole range. Upon successful
+ * completion, 0 is returned.
+ */
+static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
+						      unsigned long end)
+{
+	return __s390_uv_destroy_range(mm, start, end, true);
+}
 #endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..5356446a61c4
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 58668ffb5488..a5b45388c91f 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -13,9 +13,9 @@
 
 #include <asm/lowcore.h>
 
-#define local_softirq_pending() (S390_lowcore.softirq_pending)
-#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
-#define or_softirq_pending(x)  (S390_lowcore.softirq_pending |= (x))
+#define local_softirq_pending() (get_lowcore()->softirq_pending)
+#define set_softirq_pending(x) (get_lowcore()->softirq_pending = (x))
+#define or_softirq_pending(x)  (get_lowcore()->softirq_pending |= (x))
 
 #define __ARCH_IRQ_STAT
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/arch/s390/include/asm/hiperdispatch.h b/arch/s390/include/asm/hiperdispatch.h
new file mode 100644
index 000000000000..27e23aa27a24
--- /dev/null
+++ b/arch/s390/include/asm/hiperdispatch.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef _ASM_HIPERDISPATCH_H
+#define _ASM_HIPERDISPATCH_H
+
+void hd_reset_state(void);
+void hd_add_core(int cpu);
+void hd_disable_hiperdispatch(void);
+int hd_enable_hiperdispatch(void);
+
+#endif /* _ASM_HIPERDISPATCH_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index f22beda9e6d5..931fcc413598 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,38 +9,41 @@
 #ifndef _ASM_S390_HUGETLB_H
 #define _ASM_S390_HUGETLB_H
 
+#include <linux/cpufeature.h>
 #include <linux/pgtable.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 #include <asm/page.h>
 
-#define hugetlb_free_pgd_range			free_pgd_range
-#define hugepages_supported()			(MACHINE_HAS_EDAT1)
+#define hugepages_supported()	cpu_has_edat1()
 
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-		     pte_t *ptep, pte_t pte);
-pte_t huge_ptep_get(pte_t *ptep);
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pte_t *ptep);
+		     pte_t *ptep, pte_t pte, unsigned long sz);
+void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		       pte_t *ptep, pte_t pte);
 
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
-			unsigned long addr, unsigned long len)
+#define __HAVE_ARCH_HUGE_PTEP_GET
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr, pte_t *ptep,
+					    unsigned long sz)
 {
-	if (len & ~HPAGE_MASK)
-		return -EINVAL;
-	if (addr & ~HPAGE_MASK)
-		return -EINVAL;
-	return 0;
+	return __huge_ptep_get_and_clear(mm, addr, ptep);
 }
 
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
 {
-	clear_bit(PG_arch_1, &page->flags);
+	clear_bit(PG_arch_1, &folio->flags);
 }
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
 
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep, unsigned long sz)
 {
@@ -50,94 +53,54 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 		set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY));
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 					  unsigned long address, pte_t *ptep)
 {
-	return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+	return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
 }
 
+#define  __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 					     unsigned long addr, pte_t *ptep,
 					     pte_t pte, int dirty)
 {
-	int changed = !pte_same(huge_ptep_get(ptep), pte);
+	int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
+
 	if (changed) {
-		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
-		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+		__huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+		__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
 	}
 	return changed;
 }
 
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
-	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
-}
-
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
-{
-	return mk_pte(page, pgprot);
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
-	return pte_none(pte);
-}
-
-static inline int huge_pte_none_mostly(pte_t pte)
-{
-	return huge_pte_none(pte);
-}
-
-static inline int huge_pte_write(pte_t pte)
-{
-	return pte_write(pte);
-}
-
-static inline int huge_pte_dirty(pte_t pte)
-{
-	return pte_dirty(pte);
-}
+	pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep);
 
-static inline pte_t huge_pte_mkwrite(pte_t pte)
-{
-	return pte_mkwrite(pte);
-}
-
-static inline pte_t huge_pte_mkdirty(pte_t pte)
-{
-	return pte_mkdirty(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
-	return pte_wrprotect(pte);
-}
-
-static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return pte_modify(pte, newprot);
+	__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
 }
 
+#define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP
 static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
 {
 	return pte;
 }
 
+#define __HAVE_ARCH_HUGE_PTE_CLEAR_UFFD_WP
 static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
 {
 	return pte;
 }
 
+#define __HAVE_ARCH_HUGE_PTE_UFFD_WP
 static inline int huge_pte_uffd_wp(pte_t pte)
 {
 	return 0;
 }
 
-static inline bool gigantic_page_runtime_supported(void)
-{
-	return true;
-}
+#include <asm-generic/hugetlb.h>
 
 #endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 40eae2c08d61..ac68c657b28c 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* 
+/*
  * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
  *		    Martin Schwidefsky <schwidefsky@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
@@ -17,47 +17,65 @@
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/slab.h>
-#include <asm/cio.h>
 #include <linux/uaccess.h>
+#include <asm/dma-types.h>
+#include <asm/cio.h>
+
+#define IDA_SIZE_SHIFT		12
+#define IDA_BLOCK_SIZE		(1UL << IDA_SIZE_SHIFT)
 
-#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
-#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+#define IDA_2K_SIZE_SHIFT	11
+#define IDA_2K_BLOCK_SIZE	(1UL << IDA_2K_SIZE_SHIFT)
 
 /*
  * Test if an address/length pair needs an idal list.
  */
-static inline int
-idal_is_needed(void *vaddr, unsigned int length)
+static inline bool idal_is_needed(void *vaddr, unsigned int length)
 {
-	return ((__pa(vaddr) + length - 1) >> 31) != 0;
-}
+	dma64_t paddr = virt_to_dma64(vaddr);
 
+	return (((__force unsigned long)(paddr) + length - 1) >> 31) != 0;
+}
 
 /*
  * Return the number of idal words needed for an address/length pair.
  */
 static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
 {
-	return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
-		(IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+	unsigned int cidaw;
+
+	cidaw = (unsigned long)vaddr & (IDA_BLOCK_SIZE - 1);
+	cidaw += length + IDA_BLOCK_SIZE - 1;
+	cidaw >>= IDA_SIZE_SHIFT;
+	return cidaw;
+}
+
+/*
+ * Return the number of 2K IDA words needed for an address/length pair.
+ */
+static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length)
+{
+	unsigned int cidaw;
+
+	cidaw = (unsigned long)vaddr & (IDA_2K_BLOCK_SIZE - 1);
+	cidaw += length + IDA_2K_BLOCK_SIZE - 1;
+	cidaw >>= IDA_2K_SIZE_SHIFT;
+	return cidaw;
 }
 
 /*
  * Create the list of idal words for an address/length pair.
  */
-static inline unsigned long *idal_create_words(unsigned long *idaws,
-					       void *vaddr, unsigned int length)
+static inline dma64_t *idal_create_words(dma64_t *idaws, void *vaddr, unsigned int length)
 {
-	unsigned long paddr;
+	dma64_t paddr = virt_to_dma64(vaddr);
 	unsigned int cidaw;
 
-	paddr = __pa(vaddr);
-	cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + 
-		 (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
 	*idaws++ = paddr;
-	paddr &= -IDA_BLOCK_SIZE;
+	cidaw = idal_nr_words(vaddr, length);
+	paddr = dma64_and(paddr, -IDA_BLOCK_SIZE);
 	while (--cidaw > 0) {
-		paddr += IDA_BLOCK_SIZE;
+		paddr = dma64_add(paddr, IDA_BLOCK_SIZE);
 		*idaws++ = paddr;
 	}
 	return idaws;
@@ -67,36 +85,33 @@ static inline unsigned long *idal_create_words(unsigned long *idaws,
  * Sets the address of the data in CCW.
  * If necessary it allocates an IDAL and sets the appropriate flags.
  */
-static inline int
-set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr)
 {
 	unsigned int nridaws;
-	unsigned long *idal;
+	dma64_t *idal;
 
 	if (ccw->flags & CCW_FLAG_IDA)
 		return -EINVAL;
 	nridaws = idal_nr_words(vaddr, ccw->count);
 	if (nridaws > 0) {
-		idal = kmalloc(nridaws * sizeof(unsigned long),
-			       GFP_ATOMIC | GFP_DMA );
-		if (idal == NULL)
+		idal = kcalloc(nridaws, sizeof(*idal), GFP_ATOMIC | GFP_DMA);
+		if (!idal)
 			return -ENOMEM;
 		idal_create_words(idal, vaddr, ccw->count);
 		ccw->flags |= CCW_FLAG_IDA;
 		vaddr = idal;
 	}
-	ccw->cda = (__u32)(unsigned long) vaddr;
+	ccw->cda = virt_to_dma32(vaddr);
 	return 0;
 }
 
 /*
  * Releases any allocated IDAL related to the CCW.
  */
-static inline void
-clear_normalized_cda(struct ccw1 * ccw)
+static inline void clear_normalized_cda(struct ccw1 *ccw)
 {
 	if (ccw->flags & CCW_FLAG_IDA) {
-		kfree((void *)(unsigned long) ccw->cda);
+		kfree(dma32_to_virt(ccw->cda));
 		ccw->flags &= ~CCW_FLAG_IDA;
 	}
 	ccw->cda = 0;
@@ -108,125 +123,138 @@ clear_normalized_cda(struct ccw1 * ccw)
 struct idal_buffer {
 	size_t size;
 	size_t page_order;
-	void *data[];
+	dma64_t data[];
 };
 
 /*
  * Allocate an idal buffer
  */
-static inline struct idal_buffer *
-idal_buffer_alloc(size_t size, int page_order)
+static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order)
 {
-	struct idal_buffer *ib;
 	int nr_chunks, nr_ptrs, i;
+	struct idal_buffer *ib;
+	void *vaddr;
 
-	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
-	nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT;
+	nr_chunks = (PAGE_SIZE << page_order) >> IDA_SIZE_SHIFT;
 	ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
-	if (ib == NULL)
+	if (!ib)
 		return ERR_PTR(-ENOMEM);
 	ib->size = size;
 	ib->page_order = page_order;
 	for (i = 0; i < nr_ptrs; i++) {
-		if ((i & (nr_chunks - 1)) != 0) {
-			ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
-			continue;
-		}
-		ib->data[i] = (void *)
-			__get_free_pages(GFP_KERNEL, page_order);
-		if (ib->data[i] != NULL)
+		if (i & (nr_chunks - 1)) {
+			ib->data[i] = dma64_add(ib->data[i - 1], IDA_BLOCK_SIZE);
 			continue;
-		// Not enough memory
-		while (i >= nr_chunks) {
-			i -= nr_chunks;
-			free_pages((unsigned long) ib->data[i],
-				   ib->page_order);
 		}
-		kfree(ib);
-		return ERR_PTR(-ENOMEM);
+		vaddr = (void *)__get_free_pages(GFP_KERNEL, page_order);
+		if (!vaddr)
+			goto error;
+		ib->data[i] = virt_to_dma64(vaddr);
 	}
 	return ib;
+error:
+	while (i >= nr_chunks) {
+		i -= nr_chunks;
+		vaddr = dma64_to_virt(ib->data[i]);
+		free_pages((unsigned long)vaddr, ib->page_order);
+	}
+	kfree(ib);
+	return ERR_PTR(-ENOMEM);
 }
 
 /*
  * Free an idal buffer.
  */
-static inline void
-idal_buffer_free(struct idal_buffer *ib)
+static inline void idal_buffer_free(struct idal_buffer *ib)
 {
 	int nr_chunks, nr_ptrs, i;
+	void *vaddr;
 
-	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
-	nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
-	for (i = 0; i < nr_ptrs; i += nr_chunks)
-		free_pages((unsigned long) ib->data[i], ib->page_order);
+	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT;
+	nr_chunks = (PAGE_SIZE << ib->page_order) >> IDA_SIZE_SHIFT;
+	for (i = 0; i < nr_ptrs; i += nr_chunks) {
+		vaddr = dma64_to_virt(ib->data[i]);
+		free_pages((unsigned long)vaddr, ib->page_order);
+	}
 	kfree(ib);
 }
 
 /*
  * Test if a idal list is really needed.
  */
-static inline int
-__idal_buffer_is_needed(struct idal_buffer *ib)
+static inline bool __idal_buffer_is_needed(struct idal_buffer *ib)
 {
-	return ib->size > (4096ul << ib->page_order) ||
-		idal_is_needed(ib->data[0], ib->size);
+	if (ib->size > (PAGE_SIZE << ib->page_order))
+		return true;
+	return idal_is_needed(dma64_to_virt(ib->data[0]), ib->size);
 }
 
 /*
  * Set channel data address to idal buffer.
  */
-static inline void
-idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
 {
+	void *vaddr;
+
 	if (__idal_buffer_is_needed(ib)) {
-		// setup idals;
-		ccw->cda = (u32)(addr_t) ib->data;
+		/* Setup idals */
+		ccw->cda = virt_to_dma32(ib->data);
 		ccw->flags |= CCW_FLAG_IDA;
-	} else
-		// we do not need idals - use direct addressing
-		ccw->cda = (u32)(addr_t) ib->data[0];
+	} else {
+		/*
+		 * No idals needed - use direct addressing. Convert from
+		 * dma64_t to virt and then to dma32_t only because of type
+		 * checking. The physical address is known to be below 2GB.
+		 */
+		vaddr = dma64_to_virt(ib->data[0]);
+		ccw->cda = virt_to_dma32(vaddr);
+	}
 	ccw->count = ib->size;
 }
 
 /*
  * Copy count bytes from an idal buffer to user memory
  */
-static inline size_t
-idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
 {
 	size_t left;
+	void *vaddr;
 	int i;
 
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
-		left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+		vaddr = dma64_to_virt(ib->data[i]);
+		left = copy_to_user(to, vaddr, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
-		to = (void __user *) to + IDA_BLOCK_SIZE;
+		to = (void __user *)to + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
-	return copy_to_user(to, ib->data[i], count);
+	vaddr = dma64_to_virt(ib->data[i]);
+	return copy_to_user(to, vaddr, count);
 }
 
 /*
  * Copy count bytes from user memory to an idal buffer
  */
-static inline size_t
-idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
 {
 	size_t left;
+	void *vaddr;
 	int i;
 
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
-		left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+		vaddr = dma64_to_virt(ib->data[i]);
+		left = copy_from_user(vaddr, from, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
-		from = (void __user *) from + IDA_BLOCK_SIZE;
+		from = (void __user *)from + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
-	return copy_from_user(ib->data[i], from, count);
+	vaddr = dma64_to_virt(ib->data[i]);
+	return copy_from_user(vaddr, from, count);
 }
 
 #endif
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 5cea629c548e..09f763b9eb40 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -10,16 +10,12 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
-#include <linux/seqlock.h>
 
 struct s390_idle_data {
-	seqcount_t seqcount;
 	unsigned long idle_count;
 	unsigned long idle_time;
 	unsigned long clock_idle_enter;
-	unsigned long clock_idle_exit;
 	unsigned long timer_idle_enter;
-	unsigned long timer_idle_exit;
 	unsigned long mt_cycles_enter[8];
 };
 
@@ -27,6 +23,5 @@ extern struct device_attribute dev_attr_idle_count;
 extern struct device_attribute dev_attr_idle_time_us;
 
 void psw_idle(struct s390_idle_data *data, unsigned long psw_mask);
-void psw_idle_exit(void);
 
 #endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index e3882b012bfa..faddb9aef3b8 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -16,17 +16,24 @@
 #include <asm/pci_io.h>
 
 #define xlate_dev_mem_ptr xlate_dev_mem_ptr
+#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
 void *xlate_dev_mem_ptr(phys_addr_t phys);
 #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
 
 #define IO_SPACE_LIMIT 0
 
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
-void __iomem *ioremap(phys_addr_t addr, size_t size);
-void __iomem *ioremap_wc(phys_addr_t addr, size_t size);
-void __iomem *ioremap_wt(phys_addr_t addr, size_t size);
-void iounmap(volatile void __iomem *addr);
+/*
+ * I/O memory mapping functions.
+ */
+#define ioremap_prot ioremap_prot
+#define iounmap iounmap
+
+#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL)
+
+#define ioremap_wc(addr, size)  \
+	ioremap_prot((addr), (size), pgprot_writecombine(PAGE_KERNEL))
 
 static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
 {
@@ -51,10 +58,6 @@ static inline void ioport_unmap(void __iomem *p)
 #define pci_iomap_wc pci_iomap_wc
 #define pci_iomap_wc_range pci_iomap_wc_range
 
-#define ioremap ioremap
-#define ioremap_wt ioremap_wt
-#define ioremap_wc ioremap_wc
-
 #define memcpy_fromio(dst, src, count)	zpci_memcpy_fromio(dst, src, count)
 #define memcpy_toio(dst, src, count)	zpci_memcpy_toio(dst, src, count)
 #define memset_io(dst, val, count)	zpci_memset_io(dst, val, count)
@@ -70,6 +73,21 @@ static inline void ioport_unmap(void __iomem *p)
 #define __raw_writel	zpci_write_u32
 #define __raw_writeq	zpci_write_u64
 
+/* combine single writes by using store-block insn */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+				    size_t count)
+{
+	zpci_memcpy_toio(to, from, count * 4);
+}
+#define __iowrite32_copy __iowrite32_copy
+
+static inline void __iowrite64_copy(void __iomem *to, const void *from,
+				    size_t count)
+{
+	zpci_memcpy_toio(to, from, count * 8);
+}
+#define __iowrite64_copy __iowrite64_copy
+
 #endif /* CONFIG_PCI */
 
 #include <asm-generic/io.h>
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index a405b6bb89fb..b0d00032479d 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -22,6 +22,7 @@ struct ipl_parameter_block {
 		struct ipl_pb0_common common;
 		struct ipl_pb0_fcp fcp;
 		struct ipl_pb0_ccw ccw;
+		struct ipl_pb0_eckd eckd;
 		struct ipl_pb0_nvme nvme;
 		char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
 	};
@@ -41,6 +42,10 @@ struct ipl_parameter_block {
 			      sizeof(struct ipl_pb0_ccw))
 #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
 
+#define IPL_BP_ECKD_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_eckd))
+#define IPL_BP0_ECKD_LEN (sizeof(struct ipl_pb0_eckd))
+
 #define IPL_MAX_SUPPORTED_VERSION (0)
 
 #define IPL_RB_CERT_UNKNOWN ((unsigned short)-1)
@@ -68,6 +73,8 @@ enum ipl_type {
 	IPL_TYPE_NSS		= 16,
 	IPL_TYPE_NVME		= 32,
 	IPL_TYPE_NVME_DUMP	= 64,
+	IPL_TYPE_ECKD		= 128,
+	IPL_TYPE_ECKD_DUMP	= 256,
 };
 
 struct ipl_info
@@ -79,6 +86,9 @@ struct ipl_info
 		} ccw;
 		struct {
 			struct ccw_dev_id dev_id;
+		} eckd;
+		struct {
+			struct ccw_dev_id dev_id;
 			u64 wwpn;
 			u64 lun;
 		} fcp;
@@ -99,6 +109,7 @@ extern void set_os_info_reipl_block(void);
 static inline bool is_ipl_type_dump(void)
 {
 	return (ipl_info.type == IPL_TYPE_FCP_DUMP) ||
+		(ipl_info.type == IPL_TYPE_ECKD_DUMP) ||
 		(ipl_info.type == IPL_TYPE_NVME_DUMP);
 }
 
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 89902f754740..bde6a496df5f 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -31,6 +31,7 @@
 #include <linux/percpu.h>
 #include <linux/cache.h>
 #include <linux/types.h>
+#include <asm/ctlreg.h>
 
 enum interruption_class {
 	IRQEXT_CLK,
@@ -46,13 +47,13 @@ enum interruption_class {
 	IRQEXT_CMS,
 	IRQEXT_CMC,
 	IRQEXT_FTP,
+	IRQEXT_WTI,
 	IRQIO_CIO,
 	IRQIO_DAS,
 	IRQIO_C15,
 	IRQIO_C70,
 	IRQIO_TAP,
 	IRQIO_VMR,
-	IRQIO_LCS,
 	IRQIO_CTC,
 	IRQIO_ADM,
 	IRQIO_CSC,
@@ -98,20 +99,21 @@ int unregister_external_irq(u16 code, ext_int_handler_t handler);
 enum irq_subclass {
 	IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
 	IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+	IRQ_SUBCLASS_WARNING_TRACK = 33,
 };
 
 #define CR0_IRQ_SUBCLASS_MASK					  \
-	((1UL << (63 - 30))  /* Warning Track */		| \
-	 (1UL << (63 - 48))  /* Malfunction Alert */		| \
-	 (1UL << (63 - 49))  /* Emergency Signal */		| \
-	 (1UL << (63 - 50))  /* External Call */		| \
-	 (1UL << (63 - 52))  /* Clock Comparator */		| \
-	 (1UL << (63 - 53))  /* CPU Timer */			| \
-	 (1UL << (63 - 54))  /* Service Signal */		| \
-	 (1UL << (63 - 57))  /* Interrupt Key */		| \
-	 (1UL << (63 - 58))  /* Measurement Alert */		| \
-	 (1UL << (63 - 59))  /* Timing Alert */			| \
-	 (1UL << (63 - 62))) /* IUCV */
+	(CR0_WARNING_TRACK					| \
+	 CR0_MALFUNCTION_ALERT_SUBMASK				| \
+	 CR0_EMERGENCY_SIGNAL_SUBMASK				| \
+	 CR0_EXTERNAL_CALL_SUBMASK				| \
+	 CR0_CLOCK_COMPARATOR_SUBMASK				| \
+	 CR0_CPU_TIMER_SUBMASK					| \
+	 CR0_SERVICE_SIGNAL_SUBMASK				| \
+	 CR0_INTERRUPT_KEY_SUBMASK				| \
+	 CR0_MEASUREMENT_ALERT_SUBMASK				| \
+	 CR0_ETR_SUBMASK					| \
+	 CR0_IUCV)
 
 void irq_subclass_register(enum irq_subclass subclass);
 void irq_subclass_unregister(enum irq_subclass subclass);
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
index 603783766d0a..f00c9f610d5a 100644
--- a/arch/s390/include/asm/irq_work.h
+++ b/arch/s390/include/asm/irq_work.h
@@ -7,6 +7,4 @@ static inline bool arch_irq_work_has_interrupt(void)
 	return true;
 }
 
-void arch_irq_work_raise(void);
-
 #endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 02427b205c11..bcab456dfb80 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -37,12 +37,18 @@ static __always_inline void __arch_local_irq_ssm(unsigned long flags)
 	asm volatile("ssm   %0" : : "Q" (flags) : "memory");
 }
 
-static __always_inline unsigned long arch_local_save_flags(void)
+#ifdef CONFIG_KMSAN
+#define arch_local_irq_attributes noinline notrace __no_sanitize_memory __maybe_unused
+#else
+#define arch_local_irq_attributes __always_inline
+#endif
+
+static arch_local_irq_attributes unsigned long arch_local_save_flags(void)
 {
 	return __arch_local_irq_stnsm(0xff);
 }
 
-static __always_inline unsigned long arch_local_irq_save(void)
+static arch_local_irq_attributes unsigned long arch_local_irq_save(void)
 {
 	return __arch_local_irq_stnsm(0xfc);
 }
@@ -52,7 +58,12 @@ static __always_inline void arch_local_irq_disable(void)
 	arch_local_irq_save();
 }
 
-static __always_inline void arch_local_irq_enable(void)
+static arch_local_irq_attributes void arch_local_irq_enable_external(void)
+{
+	__arch_local_irq_stosm(0x01);
+}
+
+static arch_local_irq_attributes void arch_local_irq_enable(void)
 {
 	__arch_local_irq_stosm(0x03);
 }
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 895f774bbcc5..bf78cf381dfc 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -25,7 +25,7 @@
  */
 static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("0:	brcl 0,%l[label]\n"
+	asm goto("0:	brcl 0,%l[label]\n"
 			  ".pushsection __jump_table,\"aw\"\n"
 			  ".balign	8\n"
 			  ".long	0b-.,%l[label]-.\n"
@@ -39,7 +39,7 @@ label:
 
 static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
-	asm_volatile_goto("0:	brcl 15,%l[label]\n"
+	asm goto("0:	brcl 15,%l[label]\n"
 			  ".pushsection __jump_table,\"aw\"\n"
 			  ".balign	8\n"
 			  ".long	0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 2768d5db181f..0cffead0f2f2 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -2,7 +2,7 @@
 #ifndef __ASM_KASAN_H
 #define __ASM_KASAN_H
 
-#include <asm/pgtable.h>
+#include <linux/const.h>
 
 #ifdef CONFIG_KASAN
 
@@ -13,39 +13,6 @@
 #define KASAN_SHADOW_START	KASAN_SHADOW_OFFSET
 #define KASAN_SHADOW_END	(KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
 
-extern void kasan_early_init(void);
-extern void kasan_copy_shadow_mapping(void);
-extern void kasan_free_early_identity(void);
-
-/*
- * Estimate kasan memory requirements, which it will reserve
- * at the very end of available physical memory. To estimate
- * that, we take into account that kasan would require
- * 1/8 of available physical memory (for shadow memory) +
- * creating page tables for the whole memory + shadow memory
- * region (1 + 1/8). To keep page tables estimates simple take
- * the double of combined ptes size.
- *
- * physmem parameter has to be already adjusted if not entire physical memory
- * would be used (e.g. due to effect of "mem=" option).
- */
-static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem)
-{
-	unsigned long kasan_needs;
-	unsigned long pages;
-	/* for shadow memory */
-	kasan_needs = round_up(physmem / 8, PAGE_SIZE);
-	/* for paging structures */
-	pages = DIV_ROUND_UP(physmem + kasan_needs, PAGE_SIZE);
-	kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
-
-	return kasan_needs;
-}
-#else
-static inline void kasan_early_init(void) { }
-static inline void kasan_copy_shadow_mapping(void) { }
-static inline void kasan_free_early_identity(void) { }
-static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; }
 #endif
 
 #endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 1bd08eb56d5f..9084b750350d 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -94,6 +94,9 @@ void arch_kexec_protect_crashkres(void);
 
 void arch_kexec_unprotect_crashkres(void);
 #define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+
+bool is_kdump_kernel(void);
+#define is_kdump_kernel is_kdump_kernel
 #endif
 
 #ifdef CONFIG_KEXEC_FILE
diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h
index d55ba878378b..e95e35eb8a3f 100644
--- a/arch/s390/include/asm/kfence.h
+++ b/arch/s390/include/asm/kfence.h
@@ -12,30 +12,19 @@ void __kernel_map_pages(struct page *page, int numpages, int enable);
 
 static __always_inline bool arch_kfence_init_pool(void)
 {
-	return true;
-}
-
-#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
-
-/*
- * Do not split kfence pool to 4k mapping with arch_kfence_init_pool(),
- * but earlier where page table allocations still happen with memblock.
- * Reason is that arch_kfence_init_pool() gets called when the system
- * is still in a limbo state - disabling and enabling bottom halves is
- * not yet allowed, but that is what our page_table_alloc() would do.
- */
-static __always_inline void kfence_split_mapping(void)
-{
 #ifdef CONFIG_KFENCE
 	unsigned long pool_pages = KFENCE_POOL_SIZE >> PAGE_SHIFT;
 
 	set_memory_4k((unsigned long)__kfence_pool, pool_pages);
 #endif
+	return true;
 }
 
+#define arch_kfence_test_address(addr) ((addr) & PAGE_MASK)
+
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
 {
-	__kernel_map_pages(virt_to_page(addr), 1, !protect);
+	__kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
 	return true;
 }
 
diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h
new file mode 100644
index 000000000000..f73e181d09ae
--- /dev/null
+++ b/arch/s390/include/asm/kmsan.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_KMSAN_H
+#define _ASM_S390_KMSAN_H
+
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <linux/kmsan.h>
+#include <linux/mmzone.h>
+#include <linux/stddef.h>
+
+#ifndef MODULE
+
+static inline bool is_lowcore_addr(void *addr)
+{
+	return addr >= (void *)get_lowcore() &&
+	       addr < (void *)(get_lowcore() + 1);
+}
+
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+	if (is_lowcore_addr(addr)) {
+		/*
+		 * Different lowcores accessed via S390_lowcore are described
+		 * by the same struct page. Resolve the prefix manually in
+		 * order to get a distinct struct page.
+		 */
+		addr += (void *)lowcore_ptr[raw_smp_processor_id()] -
+			(void *)get_lowcore();
+		if (KMSAN_WARN_ON(is_lowcore_addr(addr)))
+			return NULL;
+		return kmsan_get_metadata(addr, is_origin);
+	}
+	return NULL;
+}
+
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+	bool ret;
+
+	/*
+	 * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+	 * the critical section. However, this would result in recursion with
+	 * KMSAN. Therefore, disable preemption here, and re-enable preemption
+	 * below while suppressing reschedules to avoid recursion.
+	 *
+	 * Note, this sacrifices occasionally breaking scheduling guarantees.
+	 * Although, a kernel compiled with KMSAN has already given up on any
+	 * performance guarantees due to being heavily instrumented.
+	 */
+	preempt_disable();
+	ret = virt_addr_valid(addr);
+	preempt_enable_no_resched();
+
+	return ret;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_S390_KMSAN_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 598095f4b924..01f1682a73b7 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -15,6 +15,7 @@
  *		<grundym@us.ibm.com>
  */
 #include <linux/types.h>
+#include <asm/ctlreg.h>
 #include <asm-generic/kprobes.h>
 
 #define BREAKPOINT_INSTRUCTION	0x0002
@@ -65,17 +66,13 @@ struct prev_kprobe {
 struct kprobe_ctlblk {
 	unsigned long kprobe_status;
 	unsigned long kprobe_saved_imask;
-	unsigned long kprobe_saved_ctl[3];
+	struct ctlreg kprobe_saved_ctl[3];
 	struct prev_kprobe prev_kprobe;
 };
 
 void arch_remove_kprobe(struct kprobe *p);
-void __kretprobe_trampoline(void);
-void trampoline_probe_handler(struct pt_regs *regs);
 
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
-int kprobe_exceptions_notify(struct notifier_block *self,
-	unsigned long val, void *data);
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 766028d54a3e..cb89e54ada25 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -15,20 +15,22 @@
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
 #include <linux/kvm_types.h>
-#include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/seqlock.h>
 #include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/mmu_notifier.h>
+#include <asm/kvm_host_types.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 #include <asm/isc.h>
 #include <asm/guarded_storage.h>
 
-#define KVM_S390_BSCA_CPU_SLOTS 64
-#define KVM_S390_ESCA_CPU_SLOTS 248
 #define KVM_MAX_VCPUS 255
 
+#define KVM_INTERNAL_MEM_SLOTS 1
+
 /*
  * These seem to be used for allocating ->chip in the routing table, which we
  * don't use. 1 is as small as we can get to reduce the needed memory. If we
@@ -48,321 +50,6 @@
 #define KVM_REQ_REFRESH_GUEST_PREFIX	\
 	KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
-#define SIGP_CTRL_C		0x80
-#define SIGP_CTRL_SCN_MASK	0x3f
-
-union bsca_sigp_ctrl {
-	__u8 value;
-	struct {
-		__u8 c : 1;
-		__u8 r : 1;
-		__u8 scn : 6;
-	};
-};
-
-union esca_sigp_ctrl {
-	__u16 value;
-	struct {
-		__u8 c : 1;
-		__u8 reserved: 7;
-		__u8 scn;
-	};
-};
-
-struct esca_entry {
-	union esca_sigp_ctrl sigp_ctrl;
-	__u16   reserved1[3];
-	__u64   sda;
-	__u64   reserved2[6];
-};
-
-struct bsca_entry {
-	__u8	reserved0;
-	union bsca_sigp_ctrl	sigp_ctrl;
-	__u16	reserved[3];
-	__u64	sda;
-	__u64	reserved2[2];
-};
-
-union ipte_control {
-	unsigned long val;
-	struct {
-		unsigned long k  : 1;
-		unsigned long kh : 31;
-		unsigned long kg : 32;
-	};
-};
-
-struct bsca_block {
-	union ipte_control ipte_control;
-	__u64	reserved[5];
-	__u64	mcn;
-	__u64	reserved2;
-	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
-};
-
-struct esca_block {
-	union ipte_control ipte_control;
-	__u64   reserved1[7];
-	__u64   mcn[4];
-	__u64   reserved2[20];
-	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-};
-
-/*
- * This struct is used to store some machine check info from lowcore
- * for machine checks that happen while the guest is running.
- * This info in host's lowcore might be overwritten by a second machine
- * check from host when host is in the machine check's high-level handling.
- * The size is 24 bytes.
- */
-struct mcck_volatile_info {
-	__u64 mcic;
-	__u64 failing_storage_address;
-	__u32 ext_damage_code;
-	__u32 reserved;
-};
-
-#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
-			  CR0_MEASUREMENT_ALERT_SUBMASK)
-#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
-			   CR14_EXTERNAL_DAMAGE_SUBMASK)
-
-#define SIDAD_SIZE_MASK		0xff
-#define sida_origin(sie_block) \
-	((sie_block)->sidad & PAGE_MASK)
-#define sida_size(sie_block) \
-	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
-
-#define CPUSTAT_STOPPED    0x80000000
-#define CPUSTAT_WAIT       0x10000000
-#define CPUSTAT_ECALL_PEND 0x08000000
-#define CPUSTAT_STOP_INT   0x04000000
-#define CPUSTAT_IO_INT     0x02000000
-#define CPUSTAT_EXT_INT    0x01000000
-#define CPUSTAT_RUNNING    0x00800000
-#define CPUSTAT_RETAINED   0x00400000
-#define CPUSTAT_TIMING_SUB 0x00020000
-#define CPUSTAT_SIE_SUB    0x00010000
-#define CPUSTAT_RRF        0x00008000
-#define CPUSTAT_SLSV       0x00004000
-#define CPUSTAT_SLSR       0x00002000
-#define CPUSTAT_ZARCH      0x00000800
-#define CPUSTAT_MCDS       0x00000100
-#define CPUSTAT_KSS        0x00000200
-#define CPUSTAT_SM         0x00000080
-#define CPUSTAT_IBS        0x00000040
-#define CPUSTAT_GED2       0x00000010
-#define CPUSTAT_G          0x00000008
-#define CPUSTAT_GED        0x00000004
-#define CPUSTAT_J          0x00000002
-#define CPUSTAT_P          0x00000001
-
-struct kvm_s390_sie_block {
-	atomic_t cpuflags;		/* 0x0000 */
-	__u32 : 1;			/* 0x0004 */
-	__u32 prefix : 18;
-	__u32 : 1;
-	__u32 ibc : 12;
-	__u8	reserved08[4];		/* 0x0008 */
-#define PROG_IN_SIE (1<<0)
-	__u32	prog0c;			/* 0x000c */
-	union {
-		__u8	reserved10[16];		/* 0x0010 */
-		struct {
-			__u64	pv_handle_cpu;
-			__u64	pv_handle_config;
-		};
-	};
-#define PROG_BLOCK_SIE	(1<<0)
-#define PROG_REQUEST	(1<<1)
-	atomic_t prog20;		/* 0x0020 */
-	__u8	reserved24[4];		/* 0x0024 */
-	__u64	cputm;			/* 0x0028 */
-	__u64	ckc;			/* 0x0030 */
-	__u64	epoch;			/* 0x0038 */
-	__u32	svcc;			/* 0x0040 */
-#define LCTL_CR0	0x8000
-#define LCTL_CR6	0x0200
-#define LCTL_CR9	0x0040
-#define LCTL_CR10	0x0020
-#define LCTL_CR11	0x0010
-#define LCTL_CR14	0x0002
-	__u16   lctl;			/* 0x0044 */
-	__s16	icpua;			/* 0x0046 */
-#define ICTL_OPEREXC	0x80000000
-#define ICTL_PINT	0x20000000
-#define ICTL_LPSW	0x00400000
-#define ICTL_STCTL	0x00040000
-#define ICTL_ISKE	0x00004000
-#define ICTL_SSKE	0x00002000
-#define ICTL_RRBE	0x00001000
-#define ICTL_TPROT	0x00000200
-	__u32	ictl;			/* 0x0048 */
-#define ECA_CEI		0x80000000
-#define ECA_IB		0x40000000
-#define ECA_SIGPI	0x10000000
-#define ECA_MVPGI	0x01000000
-#define ECA_AIV		0x00200000
-#define ECA_VX		0x00020000
-#define ECA_PROTEXCI	0x00002000
-#define ECA_APIE	0x00000008
-#define ECA_SII		0x00000001
-	__u32	eca;			/* 0x004c */
-#define ICPT_INST	0x04
-#define ICPT_PROGI	0x08
-#define ICPT_INSTPROGI	0x0C
-#define ICPT_EXTREQ	0x10
-#define ICPT_EXTINT	0x14
-#define ICPT_IOREQ	0x18
-#define ICPT_WAIT	0x1c
-#define ICPT_VALIDITY	0x20
-#define ICPT_STOP	0x28
-#define ICPT_OPEREXC	0x2C
-#define ICPT_PARTEXEC	0x38
-#define ICPT_IOINST	0x40
-#define ICPT_KSS	0x5c
-#define ICPT_MCHKREQ	0x60
-#define ICPT_INT_ENABLE	0x64
-#define ICPT_PV_INSTR	0x68
-#define ICPT_PV_NOTIFY	0x6c
-#define ICPT_PV_PREF	0x70
-	__u8	icptcode;		/* 0x0050 */
-	__u8	icptstatus;		/* 0x0051 */
-	__u16	ihcpu;			/* 0x0052 */
-	__u8	reserved54;		/* 0x0054 */
-#define IICTL_CODE_NONE		 0x00
-#define IICTL_CODE_MCHK		 0x01
-#define IICTL_CODE_EXT		 0x02
-#define IICTL_CODE_IO		 0x03
-#define IICTL_CODE_RESTART	 0x04
-#define IICTL_CODE_SPECIFICATION 0x10
-#define IICTL_CODE_OPERAND	 0x11
-	__u8	iictl;			/* 0x0055 */
-	__u16	ipa;			/* 0x0056 */
-	__u32	ipb;			/* 0x0058 */
-	__u32	scaoh;			/* 0x005c */
-#define FPF_BPBC 	0x20
-	__u8	fpf;			/* 0x0060 */
-#define ECB_GS		0x40
-#define ECB_TE		0x10
-#define ECB_SPECI	0x08
-#define ECB_SRSI	0x04
-#define ECB_HOSTPROTINT	0x02
-	__u8	ecb;			/* 0x0061 */
-#define ECB2_CMMA	0x80
-#define ECB2_IEP	0x20
-#define ECB2_PFMFI	0x08
-#define ECB2_ESCA	0x04
-	__u8    ecb2;                   /* 0x0062 */
-#define ECB3_DEA 0x08
-#define ECB3_AES 0x04
-#define ECB3_RI  0x01
-	__u8    ecb3;			/* 0x0063 */
-	__u32	scaol;			/* 0x0064 */
-	__u8	sdf;			/* 0x0068 */
-	__u8    epdx;			/* 0x0069 */
-	__u8	cpnc;			/* 0x006a */
-	__u8	reserved6b;		/* 0x006b */
-	__u32	todpr;			/* 0x006c */
-#define GISA_FORMAT1 0x00000001
-	__u32	gd;			/* 0x0070 */
-	__u8	reserved74[12];		/* 0x0074 */
-	__u64	mso;			/* 0x0080 */
-	__u64	msl;			/* 0x0088 */
-	psw_t	gpsw;			/* 0x0090 */
-	__u64	gg14;			/* 0x00a0 */
-	__u64	gg15;			/* 0x00a8 */
-	__u8	reservedb0[8];		/* 0x00b0 */
-#define HPID_KVM	0x4
-#define HPID_VSIE	0x5
-	__u8	hpid;			/* 0x00b8 */
-	__u8	reservedb9[7];		/* 0x00b9 */
-	union {
-		struct {
-			__u32	eiparams;	/* 0x00c0 */
-			__u16	extcpuaddr;	/* 0x00c4 */
-			__u16	eic;		/* 0x00c6 */
-		};
-		__u64	mcic;			/* 0x00c0 */
-	} __packed;
-	__u32	reservedc8;		/* 0x00c8 */
-	union {
-		struct {
-			__u16	pgmilc;		/* 0x00cc */
-			__u16	iprcc;		/* 0x00ce */
-		};
-		__u32	edc;			/* 0x00cc */
-	} __packed;
-	union {
-		struct {
-			__u32	dxc;		/* 0x00d0 */
-			__u16	mcn;		/* 0x00d4 */
-			__u8	perc;		/* 0x00d6 */
-			__u8	peratmid;	/* 0x00d7 */
-		};
-		__u64	faddr;			/* 0x00d0 */
-	} __packed;
-	__u64	peraddr;		/* 0x00d8 */
-	__u8	eai;			/* 0x00e0 */
-	__u8	peraid;			/* 0x00e1 */
-	__u8	oai;			/* 0x00e2 */
-	__u8	armid;			/* 0x00e3 */
-	__u8	reservede4[4];		/* 0x00e4 */
-	union {
-		__u64	tecmc;		/* 0x00e8 */
-		struct {
-			__u16	subchannel_id;	/* 0x00e8 */
-			__u16	subchannel_nr;	/* 0x00ea */
-			__u32	io_int_parm;	/* 0x00ec */
-			__u32	io_int_word;	/* 0x00f0 */
-		};
-	} __packed;
-	__u8	reservedf4[8];		/* 0x00f4 */
-#define CRYCB_FORMAT_MASK 0x00000003
-#define CRYCB_FORMAT0 0x00000000
-#define CRYCB_FORMAT1 0x00000001
-#define CRYCB_FORMAT2 0x00000003
-	__u32	crycbd;			/* 0x00fc */
-	__u64	gcr[16];		/* 0x0100 */
-	union {
-		__u64	gbea;		/* 0x0180 */
-		__u64	sidad;
-	};
-	__u8    reserved188[8];		/* 0x0188 */
-	__u64   sdnxo;			/* 0x0190 */
-	__u8    reserved198[8];		/* 0x0198 */
-	__u32	fac;			/* 0x01a0 */
-	__u8	reserved1a4[20];	/* 0x01a4 */
-	__u64	cbrlo;			/* 0x01b8 */
-	__u8	reserved1c0[8];		/* 0x01c0 */
-#define ECD_HOSTREGMGMT	0x20000000
-#define ECD_MEF		0x08000000
-#define ECD_ETOKENF	0x02000000
-#define ECD_ECC		0x00200000
-	__u32	ecd;			/* 0x01c8 */
-	__u8	reserved1cc[18];	/* 0x01cc */
-	__u64	pp;			/* 0x01de */
-	__u8	reserved1e6[2];		/* 0x01e6 */
-	__u64	itdba;			/* 0x01e8 */
-	__u64   riccbd;			/* 0x01f0 */
-	__u64	gvrd;			/* 0x01f8 */
-} __packed __aligned(512);
-
-struct kvm_s390_itdb {
-	__u8	data[256];
-};
-
-struct sie_page {
-	struct kvm_s390_sie_block sie_block;
-	struct mcck_volatile_info mcck_info;	/* 0x0200 */
-	__u8 reserved218[360];		/* 0x0218 */
-	__u64 pv_grregs[16];		/* 0x0380 */
-	__u8 reserved400[512];		/* 0x0400 */
-	struct kvm_s390_itdb itdb;	/* 0x0600 */
-	__u8 reserved700[2304];		/* 0x0700 */
-};
-
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
 	u64 exit_userspace;
@@ -410,6 +97,7 @@ struct kvm_vcpu_stat {
 	u64 instruction_io_other;
 	u64 instruction_lpsw;
 	u64 instruction_lpswe;
+	u64 instruction_lpswey;
 	u64 instruction_pfmf;
 	u64 instruction_ptff;
 	u64 instruction_sck;
@@ -510,6 +198,9 @@ struct kvm_vcpu_stat {
 #define PGM_REGION_FIRST_TRANS		0x39
 #define PGM_REGION_SECOND_TRANS		0x3a
 #define PGM_REGION_THIRD_TRANS		0x3b
+#define PGM_SECURE_STORAGE_ACCESS	0x3d
+#define PGM_NON_SECURE_STORAGE_ACCESS	0x3e
+#define PGM_SECURE_STORAGE_VIOLATION	0x3f
 #define PGM_MONITOR			0x40
 #define PGM_PER				0x80
 #define PGM_CRYPTO_OPERATION		0x119
@@ -726,13 +417,10 @@ struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *vsie_block;
 	unsigned int      host_acrs[NUM_ACRS];
 	struct gs_cb      *host_gscb;
-	struct fpu	  host_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
 	struct gmap *gmap;
-	/* backup location for the currently enabled gmap when scheduled out */
-	struct gmap *enabled_gmap;
 	struct kvm_guestdbg_info_arch guestdbg;
 	unsigned long pfault_token;
 	unsigned long pfault_select;
@@ -748,6 +436,8 @@ struct kvm_vcpu_arch {
 	__u64 cputm_start;
 	bool gs_enabled;
 	bool skey_enabled;
+	/* Indicator if the access registers have been loaded from guest */
+	bool acrs_loaded;
 	struct kvm_s390_pv_vcpu pv;
 	union diag318_info diag318_info;
 };
@@ -759,6 +449,14 @@ struct kvm_vm_stat {
 	u64 inject_pfault_done;
 	u64 inject_service_signal;
 	u64 inject_virtio;
+	u64 aen_forward;
+	u64 gmap_shadow_create;
+	u64 gmap_shadow_reuse;
+	u64 gmap_shadow_r1_entry;
+	u64 gmap_shadow_r2_entry;
+	u64 gmap_shadow_r3_entry;
+	u64 gmap_shadow_sg_entry;
+	u64 gmap_shadow_pg_entry;
 };
 
 struct kvm_arch_memory_slot {
@@ -793,12 +491,14 @@ struct s390_io_adapter {
 
 struct kvm_s390_cpu_model {
 	/* facility mask supported by kvm & hosting machine */
-	__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+	__u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64];
 	struct kvm_s390_vm_cpu_subfunc subfuncs;
 	/* facility list requested by guest (in dma page) */
 	__u64 *fac_list;
 	u64 cpuid;
 	unsigned short ibc;
+	/* subset of available UV-features for pv-guests enabled by user space */
+	struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
 };
 
 typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
@@ -896,12 +596,14 @@ struct sie_page2 {
 	u8 reserved928[0x1000 - 0x928];			/* 0x0928 */
 };
 
+struct vsie_page;
+
 struct kvm_s390_vsie {
 	struct mutex mutex;
 	struct radix_tree_root addr_to_page;
 	int page_count;
 	int next;
-	struct page *pages[KVM_MAX_VCPUS];
+	struct vsie_page *pages[KVM_MAX_VCPUS];
 };
 
 struct kvm_s390_gisa_iam {
@@ -923,6 +625,10 @@ struct kvm_s390_pv {
 	u64 guest_len;
 	unsigned long stor_base;
 	void *stor_var;
+	bool dumping;
+	void *set_aside;
+	struct list_head need_cleanup;
+	struct mmu_notifier mmu_notifier;
 };
 
 struct kvm_arch{
@@ -939,6 +645,7 @@ struct kvm_arch{
 	int use_cmma;
 	int use_pfmfi;
 	int use_skf;
+	int use_zpci_interp;
 	int user_cpu_state_ctrl;
 	int user_sigp;
 	int user_stsi;
@@ -962,6 +669,8 @@ struct kvm_arch{
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
 	struct kvm_s390_pv pv;
+	struct list_head kzdev_list;
+	spinlock_t kzdev_list_lock;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
@@ -994,15 +703,22 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm);
 
-extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa,
+	     unsigned long gasce);
+
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce)
+{
+	return __sie64a(virt_to_phys(sie_block), sie_block, rsa, gasce);
+}
+
 extern char sie_exit;
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
 extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
-static inline void kvm_arch_hardware_disable(void) {}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
 					 struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
@@ -1012,4 +728,14 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
+
+struct zpci_kvm_hook {
+	int (*kvm_register)(void *opaque, struct kvm *kvm);
+	void (*kvm_unregister)(void *opaque);
+};
+
+extern struct zpci_kvm_hook zpci_kvm_hook;
+
 #endif
diff --git a/arch/s390/include/asm/kvm_host_types.h b/arch/s390/include/asm/kvm_host_types.h
new file mode 100644
index 000000000000..1394d3fb648f
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host_types.h
@@ -0,0 +1,348 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_KVM_HOST_TYPES_H
+#define _ASM_KVM_HOST_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/types.h>
+
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+
+#define SIGP_CTRL_C		0x80
+#define SIGP_CTRL_SCN_MASK	0x3f
+
+union bsca_sigp_ctrl {
+	__u8 value;
+	struct {
+		__u8 c : 1;
+		__u8 r : 1;
+		__u8 scn : 6;
+	};
+};
+
+union esca_sigp_ctrl {
+	__u16 value;
+	struct {
+		__u8 c : 1;
+		__u8 reserved: 7;
+		__u8 scn;
+	};
+};
+
+struct esca_entry {
+	union esca_sigp_ctrl sigp_ctrl;
+	__u16	reserved1[3];
+	__u64	sda;
+	__u64	reserved2[6];
+};
+
+struct bsca_entry {
+	__u8	reserved0;
+	union bsca_sigp_ctrl	sigp_ctrl;
+	__u16	reserved[3];
+	__u64	sda;
+	__u64	reserved2[2];
+};
+
+union ipte_control {
+	unsigned long val;
+	struct {
+		unsigned long k  : 1;
+		unsigned long kh : 31;
+		unsigned long kg : 32;
+	};
+};
+
+/*
+ * Utility is defined as two bytes but having it four bytes wide
+ * generates more efficient code. Since the following bytes are
+ * reserved this makes no functional difference.
+ */
+union sca_utility {
+	__u32 val;
+	struct {
+		__u32 mtcr : 1;
+		__u32	   : 31;
+	};
+};
+
+struct bsca_block {
+	union ipte_control ipte_control;
+	__u64	reserved[5];
+	__u64	mcn;
+	union sca_utility utility;
+	__u8	reserved2[4];
+	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
+};
+
+struct esca_block {
+	union ipte_control ipte_control;
+	__u64	reserved1[6];
+	union sca_utility utility;
+	__u8	reserved2[4];
+	__u64	mcn[4];
+	__u64	reserved3[20];
+	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+};
+
+/*
+ * This struct is used to store some machine check info from lowcore
+ * for machine checks that happen while the guest is running.
+ * This info in host's lowcore might be overwritten by a second machine
+ * check from host when host is in the machine check's high-level handling.
+ * The size is 24 bytes.
+ */
+struct mcck_volatile_info {
+	__u64 mcic;
+	__u64 failing_storage_address;
+	__u32 ext_damage_code;
+	__u32 reserved;
+};
+
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+			  CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+			   CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK		0xff
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
+#define CPUSTAT_STOPPED    0x80000000
+#define CPUSTAT_WAIT	   0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT	   0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF	   0x00008000
+#define CPUSTAT_SLSV	   0x00004000
+#define CPUSTAT_SLSR	   0x00002000
+#define CPUSTAT_ZARCH	   0x00000800
+#define CPUSTAT_MCDS	   0x00000100
+#define CPUSTAT_KSS	   0x00000200
+#define CPUSTAT_SM	   0x00000080
+#define CPUSTAT_IBS	   0x00000040
+#define CPUSTAT_GED2	   0x00000010
+#define CPUSTAT_G	   0x00000008
+#define CPUSTAT_GED	   0x00000004
+#define CPUSTAT_J	   0x00000002
+#define CPUSTAT_P	   0x00000001
+
+struct kvm_s390_sie_block {
+	atomic_t cpuflags;		/* 0x0000 */
+	__u32 : 1;			/* 0x0004 */
+	__u32 prefix : 18;
+	__u32 : 1;
+	__u32 ibc : 12;
+	__u8	reserved08[4];		/* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+	__u32	prog0c;			/* 0x000c */
+	union {
+		__u8	reserved10[16];		/* 0x0010 */
+		struct {
+			__u64	pv_handle_cpu;
+			__u64	pv_handle_config;
+		};
+	};
+#define PROG_BLOCK_SIE	(1<<0)
+#define PROG_REQUEST	(1<<1)
+	atomic_t prog20;		/* 0x0020 */
+	__u8	reserved24[4];		/* 0x0024 */
+	__u64	cputm;			/* 0x0028 */
+	__u64	ckc;			/* 0x0030 */
+	__u64	epoch;			/* 0x0038 */
+	__u32	svcc;			/* 0x0040 */
+#define LCTL_CR0	0x8000
+#define LCTL_CR6	0x0200
+#define LCTL_CR9	0x0040
+#define LCTL_CR10	0x0020
+#define LCTL_CR11	0x0010
+#define LCTL_CR14	0x0002
+	__u16	lctl;			/* 0x0044 */
+	__s16	icpua;			/* 0x0046 */
+#define ICTL_OPEREXC	0x80000000
+#define ICTL_PINT	0x20000000
+#define ICTL_LPSW	0x00400000
+#define ICTL_STCTL	0x00040000
+#define ICTL_ISKE	0x00004000
+#define ICTL_SSKE	0x00002000
+#define ICTL_RRBE	0x00001000
+#define ICTL_TPROT	0x00000200
+	__u32	ictl;			/* 0x0048 */
+#define ECA_CEI		0x80000000
+#define ECA_IB		0x40000000
+#define ECA_SIGPI	0x10000000
+#define ECA_MVPGI	0x01000000
+#define ECA_AIV		0x00200000
+#define ECA_VX		0x00020000
+#define ECA_PROTEXCI	0x00002000
+#define ECA_APIE	0x00000008
+#define ECA_SII		0x00000001
+	__u32	eca;			/* 0x004c */
+#define ICPT_INST	0x04
+#define ICPT_PROGI	0x08
+#define ICPT_INSTPROGI	0x0C
+#define ICPT_EXTREQ	0x10
+#define ICPT_EXTINT	0x14
+#define ICPT_IOREQ	0x18
+#define ICPT_WAIT	0x1c
+#define ICPT_VALIDITY	0x20
+#define ICPT_STOP	0x28
+#define ICPT_OPEREXC	0x2C
+#define ICPT_PARTEXEC	0x38
+#define ICPT_IOINST	0x40
+#define ICPT_KSS	0x5c
+#define ICPT_MCHKREQ	0x60
+#define ICPT_INT_ENABLE	0x64
+#define ICPT_PV_INSTR	0x68
+#define ICPT_PV_NOTIFY	0x6c
+#define ICPT_PV_PREF	0x70
+	__u8	icptcode;		/* 0x0050 */
+	__u8	icptstatus;		/* 0x0051 */
+	__u16	ihcpu;			/* 0x0052 */
+	__u8	reserved54;		/* 0x0054 */
+#define IICTL_CODE_NONE		 0x00
+#define IICTL_CODE_MCHK		 0x01
+#define IICTL_CODE_EXT		 0x02
+#define IICTL_CODE_IO		 0x03
+#define IICTL_CODE_RESTART	 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND	 0x11
+	__u8	iictl;			/* 0x0055 */
+	__u16	ipa;			/* 0x0056 */
+	__u32	ipb;			/* 0x0058 */
+	__u32	scaoh;			/* 0x005c */
+#define FPF_BPBC	0x20
+	__u8	fpf;			/* 0x0060 */
+#define ECB_GS		0x40
+#define ECB_TE		0x10
+#define ECB_SPECI	0x08
+#define ECB_SRSI	0x04
+#define ECB_HOSTPROTINT	0x02
+#define ECB_PTF		0x01
+	__u8	ecb;			/* 0x0061 */
+#define ECB2_CMMA	0x80
+#define ECB2_IEP	0x20
+#define ECB2_PFMFI	0x08
+#define ECB2_ESCA	0x04
+#define ECB2_ZPCI_LSI	0x02
+	__u8	ecb2;			/* 0x0062 */
+#define ECB3_AISI	0x20
+#define ECB3_AISII	0x10
+#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI  0x01
+	__u8	ecb3;			/* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
+	__u32	scaol;			/* 0x0064 */
+	__u8	sdf;			/* 0x0068 */
+	__u8	epdx;			/* 0x0069 */
+	__u8	cpnc;			/* 0x006a */
+	__u8	reserved6b;		/* 0x006b */
+	__u32	todpr;			/* 0x006c */
+#define GISA_FORMAT1 0x00000001
+	__u32	gd;			/* 0x0070 */
+	__u8	reserved74[12];		/* 0x0074 */
+	__u64	mso;			/* 0x0080 */
+	__u64	msl;			/* 0x0088 */
+	psw_t	gpsw;			/* 0x0090 */
+	__u64	gg14;			/* 0x00a0 */
+	__u64	gg15;			/* 0x00a8 */
+	__u8	reservedb0[8];		/* 0x00b0 */
+#define HPID_KVM	0x4
+#define HPID_VSIE	0x5
+	__u8	hpid;			/* 0x00b8 */
+	__u8	reservedb9[7];		/* 0x00b9 */
+	union {
+		struct {
+			__u32	eiparams;	/* 0x00c0 */
+			__u16	extcpuaddr;	/* 0x00c4 */
+			__u16	eic;		/* 0x00c6 */
+		};
+		__u64	mcic;			/* 0x00c0 */
+	} __packed;
+	__u32	reservedc8;		/* 0x00c8 */
+	union {
+		struct {
+			__u16	pgmilc;		/* 0x00cc */
+			__u16	iprcc;		/* 0x00ce */
+		};
+		__u32	edc;			/* 0x00cc */
+	} __packed;
+	union {
+		struct {
+			__u32	dxc;		/* 0x00d0 */
+			__u16	mcn;		/* 0x00d4 */
+			__u8	perc;		/* 0x00d6 */
+			__u8	peratmid;	/* 0x00d7 */
+		};
+		__u64	faddr;			/* 0x00d0 */
+	} __packed;
+	__u64	peraddr;		/* 0x00d8 */
+	__u8	eai;			/* 0x00e0 */
+	__u8	peraid;			/* 0x00e1 */
+	__u8	oai;			/* 0x00e2 */
+	__u8	armid;			/* 0x00e3 */
+	__u8	reservede4[4];		/* 0x00e4 */
+	union {
+		__u64	tecmc;		/* 0x00e8 */
+		struct {
+			__u16	subchannel_id;	/* 0x00e8 */
+			__u16	subchannel_nr;	/* 0x00ea */
+			__u32	io_int_parm;	/* 0x00ec */
+			__u32	io_int_word;	/* 0x00f0 */
+		};
+	} __packed;
+	__u8	reservedf4[8];		/* 0x00f4 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+	__u32	crycbd;			/* 0x00fc */
+	__u64	gcr[16];		/* 0x0100 */
+	union {
+		__u64	gbea;		/* 0x0180 */
+		__u64	sidad;
+	};
+	__u8	reserved188[8];		/* 0x0188 */
+	__u64	sdnxo;			/* 0x0190 */
+	__u8	reserved198[8];		/* 0x0198 */
+	__u32	fac;			/* 0x01a0 */
+	__u8	reserved1a4[20];	/* 0x01a4 */
+	__u64	cbrlo;			/* 0x01b8 */
+	__u8	reserved1c0[8];		/* 0x01c0 */
+#define ECD_HOSTREGMGMT	0x20000000
+#define ECD_MEF		0x08000000
+#define ECD_ETOKENF	0x02000000
+#define ECD_ECC		0x00200000
+#define ECD_HMAC	0x00004000
+	__u32	ecd;			/* 0x01c8 */
+	__u8	reserved1cc[18];	/* 0x01cc */
+	__u64	pp;			/* 0x01de */
+	__u8	reserved1e6[2];		/* 0x01e6 */
+	__u64	itdba;			/* 0x01e8 */
+	__u64	riccbd;			/* 0x01f0 */
+	__u64	gvrd;			/* 0x01f8 */
+} __packed __aligned(512);
+
+struct kvm_s390_itdb {
+	__u8	data[256];
+};
+
+struct sie_page {
+	struct kvm_s390_sie_block sie_block;
+	struct mcck_volatile_info mcck_info;	/* 0x0200 */
+	__u8 reserved218[360];		/* 0x0218 */
+	__u64 pv_grregs[16];		/* 0x0380 */
+	__u8 reserved400[512];		/* 0x0400 */
+	struct kvm_s390_itdb itdb;	/* 0x0600 */
+	__u8 reserved700[2304];		/* 0x0700 */
+};
+
+#endif /* _ASM_KVM_HOST_TYPES_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index c76777b15fec..df3fb7d8227b 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -4,7 +4,7 @@
 
 #include <linux/stringify.h>
 
-#define __ALIGN .align 16, 0x07
+#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
 #endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 26fe5e535728..e99e9c87b1ce 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -10,13 +10,20 @@
 #define _ASM_S390_LOWCORE_H
 
 #include <linux/types.h>
+#include <asm/machine.h>
 #include <asm/ptrace.h>
+#include <asm/ctlreg.h>
 #include <asm/cpu.h>
 #include <asm/types.h>
+#include <asm/alternative.h>
 
 #define LC_ORDER 1
 #define LC_PAGES 2
 
+#define LOWCORE_ALT_ADDRESS	_AC(0x70000, UL)
+
+#ifndef __ASSEMBLY__
+
 struct pgm_tdb {
 	u64 data[32];
 };
@@ -92,12 +99,11 @@ struct lowcore {
 	psw_t	io_new_psw;			/* 0x01f0 */
 
 	/* Save areas. */
-	__u64	save_area_sync[8];		/* 0x0200 */
-	__u64	save_area_async[8];		/* 0x0240 */
+	__u64	save_area[8];			/* 0x0200 */
+	__u8	pad_0x0240[0x0280-0x0240];	/* 0x0240 */
 	__u64	save_area_restart[1];		/* 0x0280 */
 
-	/* CPU flags. */
-	__u64	cpu_flags;			/* 0x0288 */
+	__u64	pcpu;				/* 0x0288 */
 
 	/* Return psws. */
 	psw_t	return_psw;			/* 0x0290 */
@@ -118,10 +124,10 @@ struct lowcore {
 	__u64	avg_steal_timer;		/* 0x0300 */
 	__u64	last_update_timer;		/* 0x0308 */
 	__u64	last_update_clock;		/* 0x0310 */
-	__u64	int_clock;			/* 0x0318*/
-	__u64	mcck_clock;			/* 0x0320 */
+	__u64	int_clock;			/* 0x0318 */
+	__u8	pad_0x0320[0x0328-0x0320];	/* 0x0320 */
 	__u64	clock_comparator;		/* 0x0328 */
-	__u64	boot_clock[2];			/* 0x0330 */
+	__u8	pad_0x0330[0x0340-0x0330];	/* 0x0330 */
 
 	/* Current process. */
 	__u64	current_task;			/* 0x0340 */
@@ -139,8 +145,8 @@ struct lowcore {
 	__u32	restart_flags;			/* 0x0384 */
 
 	/* Address space pointer. */
-	__u64	kernel_asce;			/* 0x0388 */
-	__u64	user_asce;			/* 0x0390 */
+	struct ctlreg kernel_asce;		/* 0x0388 */
+	struct ctlreg user_asce;		/* 0x0390 */
 
 	/*
 	 * The lpp and current_pid fields form a
@@ -156,12 +162,9 @@ struct lowcore {
 	__s32	preempt_count;			/* 0x03a8 */
 	__u32	spinlock_lockval;		/* 0x03ac */
 	__u32	spinlock_index;			/* 0x03b0 */
-	__u32	fpu_flags;			/* 0x03b4 */
+	__u8	pad_0x03b4[0x03b8-0x03b4];	/* 0x03b4 */
 	__u64	percpu_offset;			/* 0x03b8 */
-	__u8	pad_0x03c0[0x03c8-0x03c0];	/* 0x03c0 */
-	__u64	machine_flags;			/* 0x03c8 */
-	__u64	gmap;				/* 0x03d0 */
-	__u8	pad_0x03d8[0x0400-0x03d8];	/* 0x03d8 */
+	__u8	pad_0x03c0[0x0400-0x03c0];	/* 0x03c0 */
 
 	__u32	return_lpswe;			/* 0x0400 */
 	__u32	return_mcck_lpswe;		/* 0x0404 */
@@ -199,18 +202,33 @@ struct lowcore {
 	__u32	clock_comp_save_area[2];	/* 0x1330 */
 	__u64	last_break_save_area;		/* 0x1338 */
 	__u32	access_regs_save_area[16];	/* 0x1340 */
-	__u64	cregs_save_area[16];		/* 0x1380 */
+	struct ctlreg cregs_save_area[16];	/* 0x1380 */
 	__u8	pad_0x1400[0x1500-0x1400];	/* 0x1400 */
 	/* Cryptography-counter designation */
 	__u64	ccd;				/* 0x1500 */
-	__u8	pad_0x1508[0x1800-0x1508];	/* 0x1508 */
+	/* AI-extension counter designation */
+	__u64	aicd;				/* 0x1508 */
+	__u8	pad_0x1510[0x1800-0x1510];	/* 0x1510 */
 
 	/* Transaction abort diagnostic block */
 	struct pgm_tdb pgm_tdb;			/* 0x1800 */
 	__u8	pad_0x1900[0x2000-0x1900];	/* 0x1900 */
 } __packed __aligned(8192);
 
-#define S390_lowcore (*((struct lowcore *) 0))
+static __always_inline struct lowcore *get_lowcore(void)
+{
+	struct lowcore *lc;
+
+	if (__is_defined(__DECOMPRESSOR))
+		return NULL;
+	asm_inline(
+		ALTERNATIVE("	lghi	%[lc],0",
+			    "	llilh	%[lc],%[alt]",
+			    ALT_FEATURE(MFEATURE_LOWCORE))
+		: [lc] "=d" (lc)
+		: [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
+	return lc;
+}
 
 extern struct lowcore *lowcore_ptr[];
 
@@ -219,12 +237,19 @@ static inline void set_prefix(__u32 address)
 	asm volatile("spx %0" : : "Q" (address) : "memory");
 }
 
-static inline __u32 store_prefix(void)
-{
-	__u32 address;
+#else /* __ASSEMBLY__ */
 
-	asm volatile("stpx %0" : "=Q" (address));
-	return address;
-}
+.macro GET_LC reg
+	ALTERNATIVE "lghi	\reg,0",					\
+		__stringify(llilh	\reg, LOWCORE_ALT_ADDRESS >> 16),	\
+		ALT_FEATURE(MFEATURE_LOWCORE)
+.endm
+
+.macro STMG_LC start, end, savearea
+	ALTERNATIVE "stmg	\start, \end, \savearea",				\
+		__stringify(stmg	\start, \end, LOWCORE_ALT_ADDRESS + \savearea),	\
+		ALT_FEATURE(MFEATURE_LOWCORE)
+.endm
 
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 000000000000..50225940d971
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+#define MEMCPY_REAL_SIZE	PAGE_SIZE
+#define MEMCPY_REAL_MASK	PAGE_MASK
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+extern pte_t *memcpy_real_ptep;
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/machine.h b/arch/s390/include/asm/machine.h
new file mode 100644
index 000000000000..8abe5afdbfc4
--- /dev/null
+++ b/arch/s390/include/asm/machine.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __ASM_S390_MACHINE_H
+#define __ASM_S390_MACHINE_H
+
+#include <linux/const.h>
+
+#define MFEATURE_LOWCORE	0
+#define MFEATURE_PCI_MIO	1
+#define MFEATURE_SCC		2
+#define MFEATURE_TLB_GUEST	3
+#define MFEATURE_TX		4
+#define MFEATURE_ESOP		5
+#define MFEATURE_DIAG9C		6
+#define MFEATURE_VM		7
+#define MFEATURE_KVM		8
+#define MFEATURE_LPAR		9
+#define MFEATURE_DIAG288	10
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bitops.h>
+#include <asm/alternative.h>
+
+extern unsigned long machine_features[1];
+
+#define MAX_MFEATURE_BIT (sizeof(machine_features) * BITS_PER_BYTE)
+
+static inline void __set_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+	if (nr >= MAX_MFEATURE_BIT)
+		return;
+	__set_bit(nr, mfeatures);
+}
+
+static inline void set_machine_feature(unsigned int nr)
+{
+	__set_machine_feature(nr, machine_features);
+}
+
+static inline void __clear_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+	if (nr >= MAX_MFEATURE_BIT)
+		return;
+	__clear_bit(nr, mfeatures);
+}
+
+static inline void clear_machine_feature(unsigned int nr)
+{
+	__clear_machine_feature(nr, machine_features);
+}
+
+static bool __test_machine_feature(unsigned int nr, unsigned long *mfeatures)
+{
+	if (nr >= MAX_MFEATURE_BIT)
+		return false;
+	return test_bit(nr, mfeatures);
+}
+
+static bool test_machine_feature(unsigned int nr)
+{
+	return __test_machine_feature(nr, machine_features);
+}
+
+static __always_inline bool __test_machine_feature_constant(unsigned int nr)
+{
+	asm goto(
+		ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FEATURE(%[nr]))
+		:
+		: [nr] "i" (nr)
+		:
+		: l_no);
+	return true;
+l_no:
+	return false;
+}
+
+#define DEFINE_MACHINE_HAS_FEATURE(name, feature)				\
+static __always_inline bool machine_has_##name(void)				\
+{										\
+	if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(feature))	\
+		return __test_machine_feature_constant(feature);		\
+	return test_machine_feature(feature);					\
+}
+
+DEFINE_MACHINE_HAS_FEATURE(relocated_lowcore, MFEATURE_LOWCORE)
+DEFINE_MACHINE_HAS_FEATURE(scc, MFEATURE_SCC)
+DEFINE_MACHINE_HAS_FEATURE(tlb_guest, MFEATURE_TLB_GUEST)
+DEFINE_MACHINE_HAS_FEATURE(tx, MFEATURE_TX)
+DEFINE_MACHINE_HAS_FEATURE(esop, MFEATURE_ESOP)
+DEFINE_MACHINE_HAS_FEATURE(diag9c, MFEATURE_DIAG9C)
+DEFINE_MACHINE_HAS_FEATURE(vm, MFEATURE_VM)
+DEFINE_MACHINE_HAS_FEATURE(kvm, MFEATURE_KVM)
+DEFINE_MACHINE_HAS_FEATURE(lpar, MFEATURE_LPAR)
+
+#define machine_is_vm	machine_has_vm
+#define machine_is_kvm	machine_has_kvm
+#define machine_is_lpar	machine_has_lpar
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_S390_MACHINE_H */
diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h
new file mode 100644
index 000000000000..11a71bd14954
--- /dev/null
+++ b/arch/s390/include/asm/march.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_S390_MARCH_H
+#define __ASM_S390_MARCH_H
+
+#include <linux/kconfig.h>
+
+#define MARCH_HAS_Z10_FEATURES 1
+
+#ifndef __DECOMPRESSOR
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#define MARCH_HAS_Z196_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+#define MARCH_HAS_ZEC12_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+#define MARCH_HAS_Z13_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
+#define MARCH_HAS_Z14_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
+#define MARCH_HAS_Z15_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z16_FEATURES
+#define MARCH_HAS_Z16_FEATURES 1
+#endif
+
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+#define MARCH_HAS_Z17_FEATURES 1
+#endif
+
+#endif /* __DECOMPRESSOR */
+
+#endif /* __ASM_S390_MARCH_H */
diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h
deleted file mode 100644
index a7c922a69050..000000000000
--- a/arch/s390/include/asm/mem_detect.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_MEM_DETECT_H
-#define _ASM_S390_MEM_DETECT_H
-
-#include <linux/types.h>
-
-enum mem_info_source {
-	MEM_DETECT_NONE = 0,
-	MEM_DETECT_SCLP_STOR_INFO,
-	MEM_DETECT_DIAG260,
-	MEM_DETECT_SCLP_READ_INFO,
-	MEM_DETECT_BIN_SEARCH
-};
-
-struct mem_detect_block {
-	u64 start;
-	u64 end;
-};
-
-/*
- * Storage element id is defined as 1 byte (up to 256 storage elements).
- * In practise only storage element id 0 and 1 are used).
- * According to architecture one storage element could have as much as
- * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info.
- * If more mem_detect_blocks are required, a block of memory from already
- * known mem_detect_block is taken (entries_extended points to it).
- */
-#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */
-
-struct mem_detect_info {
-	u32 count;
-	u8 info_source;
-	struct mem_detect_block entries[MEM_INLINED_ENTRIES];
-	struct mem_detect_block *entries_extended;
-};
-extern struct mem_detect_info mem_detect;
-
-void add_mem_detect_block(u64 start, u64 end);
-
-static inline int __get_mem_detect_block(u32 n, unsigned long *start,
-					 unsigned long *end)
-{
-	if (n >= mem_detect.count) {
-		*start = 0;
-		*end = 0;
-		return -1;
-	}
-
-	if (n < MEM_INLINED_ENTRIES) {
-		*start = (unsigned long)mem_detect.entries[n].start;
-		*end = (unsigned long)mem_detect.entries[n].end;
-	} else {
-		*start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start;
-		*end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end;
-	}
-	return 0;
-}
-
-/**
- * for_each_mem_detect_block - early online memory range iterator
- * @i: an integer used as loop variable
- * @p_start: ptr to unsigned long for start address of the range
- * @p_end: ptr to unsigned long for end address of the range
- *
- * Walks over detected online memory ranges.
- */
-#define for_each_mem_detect_block(i, p_start, p_end)			\
-	for (i = 0, __get_mem_detect_block(i, p_start, p_end);		\
-	     i < mem_detect.count;					\
-	     i++, __get_mem_detect_block(i, p_start, p_end))
-
-static inline void get_mem_detect_reserved(unsigned long *start,
-					   unsigned long *size)
-{
-	*start = (unsigned long)mem_detect.entries_extended;
-	if (mem_detect.count > MEM_INLINED_ENTRIES)
-		*size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block);
-	else
-		*size = 0;
-}
-
-static inline unsigned long get_mem_detect_end(void)
-{
-	unsigned long start;
-	unsigned long end;
-
-	if (mem_detect.count) {
-		__get_mem_detect_block(mem_detect.count - 1, &start, &end);
-		return end;
-	}
-	return 0;
-}
-
-#endif
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 08a8b96606d7..b85e13505a0f 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,8 +4,8 @@
 
 #ifndef __ASSEMBLY__
 
-int set_memory_encrypted(unsigned long addr, int numpages);
-int set_memory_decrypted(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long vaddr, int numpages);
+int set_memory_decrypted(unsigned long vaddr, int numpages);
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 82aae78e1315..f07e49b419ab 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,49 +11,35 @@ typedef struct {
 	cpumask_t cpu_attach_mask;
 	atomic_t flush_count;
 	unsigned int flush_mm;
-	struct list_head pgtable_list;
 	struct list_head gmap_list;
 	unsigned long gmap_asce;
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
 	/* The mmu context belongs to a secure guest. */
-	atomic_t is_protected;
+	atomic_t protected_count;
 	/*
 	 * The following bitfields need a down_write on the mm
 	 * semaphore when they are written to. As they are only
 	 * written once, they can be read without a lock.
-	 *
-	 * The mmu context allocates 4K page tables.
 	 */
-	unsigned int alloc_pgste:1;
 	/* The mmu context uses extended page tables. */
 	unsigned int has_pgste:1;
 	/* The mmu context uses storage keys. */
 	unsigned int uses_skeys:1;
 	/* The mmu context uses CMM. */
 	unsigned int uses_cmm:1;
+	/*
+	 * The mmu context allows COW-sharing of memory pages (KSM, zeropage).
+	 * Note that COW-sharing during fork() is currently always allowed.
+	 */
+	unsigned int allow_cow_sharing:1;
 	/* The gmaps associated with this context are allowed to use huge pages. */
 	unsigned int allow_gmap_hpage_1m:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
 	.context.lock =	__SPIN_LOCK_UNLOCKED(name.context.lock),	   \
-	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
 	.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
 
-static inline int tprot(unsigned long addr)
-{
-	int rc = -EFAULT;
-
-	asm volatile(
-		"	tprot	0(%1),0\n"
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "a" (addr) : "cc");
-	return rc;
-}
-
 #endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c7937f369e62..d9b8501bc93d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -12,7 +12,8 @@
 #include <linux/uaccess.h>
 #include <linux/mm_types.h>
 #include <asm/tlbflush.h>
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
+#include <asm/asce.h>
 #include <asm-generic/mm_hooks.h>
 
 #define init_new_context init_new_context
@@ -22,20 +23,17 @@ static inline int init_new_context(struct task_struct *tsk,
 	unsigned long asce_type, init_entry;
 
 	spin_lock_init(&mm->context.lock);
-	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
-	atomic_set(&mm->context.is_protected, 0);
+	atomic_set(&mm->context.protected_count, 0);
 	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
-	mm->context.alloc_pgste = page_table_allocate_pgste ||
-		test_thread_flag(TIF_PGSTE) ||
-		(current->mm && current->mm->context.alloc_pgste);
 	mm->context.has_pgste = 0;
 	mm->context.uses_skeys = 0;
 	mm->context.uses_cmm = 0;
+	mm->context.allow_cow_sharing = 1;
 	mm->context.allow_gmap_hpage_1m = 0;
 #endif
 	switch (mm->context.asce_limit) {
@@ -76,12 +74,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
 	int cpu = smp_processor_id();
 
 	if (next == &init_mm)
-		S390_lowcore.user_asce = s390_invalid_asce;
+		get_lowcore()->user_asce = s390_invalid_asce;
 	else
-		S390_lowcore.user_asce = next->context.asce;
+		get_lowcore()->user_asce.val = next->context.asce;
 	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
-	/* Clear previous user-ASCE from CR7 */
-	__ctl_load(s390_invalid_asce, 7, 7);
+	/* Clear previous user-ASCE from CR1 and CR7 */
+	local_ctl_load(1, &s390_invalid_asce);
+	local_ctl_load(7, &s390_invalid_asce);
 	if (prev != next)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
@@ -102,6 +101,7 @@ static inline void finish_arch_post_lock_switch(void)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	unsigned long flags;
 
 	if (mm) {
 		preempt_disable();
@@ -111,16 +111,26 @@ static inline void finish_arch_post_lock_switch(void)
 		__tlb_flush_mm_lazy(mm);
 		preempt_enable();
 	}
-	__ctl_load(S390_lowcore.user_asce, 7, 7);
+	local_irq_save(flags);
+	if (test_thread_flag(TIF_ASCE_PRIMARY))
+		local_ctl_load(1, &get_lowcore()->kernel_asce);
+	else
+		local_ctl_load(1, &get_lowcore()->user_asce);
+	local_ctl_load(7, &get_lowcore()->user_asce);
+	local_irq_restore(flags);
 }
 
 #define activate_mm activate_mm
 static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
-	switch_mm(prev, next, current);
+	switch_mm_irqs_off(prev, next, current);
 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
-	__ctl_load(S390_lowcore.user_asce, 7, 7);
+	if (test_thread_flag(TIF_ASCE_PRIMARY))
+		local_ctl_load(1, &get_lowcore()->kernel_asce);
+	else
+		local_ctl_load(1, &get_lowcore()->user_asce);
+	local_ctl_load(7, &get_lowcore()->user_asce);
 }
 
 #include <asm-generic/mmu_context.h>
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
deleted file mode 100644
index 73e3e7c6976c..000000000000
--- a/arch/s390/include/asm/mmzone.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Copyright IBM Corp. 2015
- */
-
-#ifndef _ASM_S390_MMZONE_H
-#define _ASM_S390_MMZONE_H
-
-#ifdef CONFIG_NUMA
-
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif /* CONFIG_NUMA */
-#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
index 9f1eea15872c..916ab59e458a 100644
--- a/arch/s390/include/asm/module.h
+++ b/arch/s390/include/asm/module.h
@@ -38,4 +38,18 @@ struct mod_arch_specific {
 #endif /* CONFIG_FUNCTION_TRACER */
 };
 
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+					   const Elf_Shdr *sechdrs,
+					   const char *name)
+{
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	const Elf_Shdr *s, *se;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+		if (strcmp(name, secstrs + s->sh_name) == 0)
+			return s;
+	}
+	return NULL;
+}
+
 #endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/msi.h b/arch/s390/include/asm/msi.h
new file mode 100644
index 000000000000..399343ed9ffb
--- /dev/null
+++ b/arch/s390/include/asm/msi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MSI_H
+#define _ASM_S390_MSI_H
+#include <asm-generic/msi.h>
+
+/*
+ * Work around S390 not using irq_domain at all so we can't set
+ * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works:
+ *
+ * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/
+ *
+ * Note this is less isolated than the ARM/x86 versions as userspace can trigger
+ * MSI belonging to kernel devices within the same gisa.
+ */
+#define arch_is_isolated_msi() true
+
+#endif
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index af1cd3a6f406..227466ce9e41 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -101,9 +101,8 @@ void nmi_alloc_mcesa_early(u64 *mcesad);
 int nmi_alloc_mcesa(u64 *mcesad);
 void nmi_free_mcesa(u64 *mcesad);
 
-void s390_handle_mcck(struct pt_regs *regs);
-void __s390_handle_mcck(void);
-int s390_do_machine_check(struct pt_regs *regs);
+void s390_handle_mcck(void);
+void s390_do_machine_check(struct pt_regs *regs);
 
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 82725cf783c7..c7c96282f011 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -5,8 +5,17 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/facility.h>
 
 extern int nospec_disable;
+extern int nobp;
+
+static inline bool nobp_enabled(void)
+{
+	if (__is_defined(__DECOMPRESSOR))
+		return false;
+	return nobp && test_facility(82);
+}
 
 void nospec_init_branches(void);
 void nospec_auto_detect(void);
@@ -17,6 +26,22 @@ static inline bool nospec_uses_trampoline(void)
 	return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
 }
 
+void __s390_indirect_jump_r1(void);
+void __s390_indirect_jump_r2(void);
+void __s390_indirect_jump_r3(void);
+void __s390_indirect_jump_r4(void);
+void __s390_indirect_jump_r5(void);
+void __s390_indirect_jump_r6(void);
+void __s390_indirect_jump_r7(void);
+void __s390_indirect_jump_r8(void);
+void __s390_indirect_jump_r9(void);
+void __s390_indirect_jump_r10(void);
+void __s390_indirect_jump_r11(void);
+void __s390_indirect_jump_r12(void);
+void __s390_indirect_jump_r13(void);
+void __s390_indirect_jump_r14(void);
+void __s390_indirect_jump_r15(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 7e9e99523e95..cb15dd25bf21 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_S390_NOSPEC_ASM_H
 #define _ASM_S390_NOSPEC_ASM_H
 
+#include <linux/linkage.h>
 #include <asm/dwarf.h>
 
 #ifdef __ASSEMBLY__
@@ -15,24 +16,25 @@
  */
 	.macro __THUNK_PROLOG_NAME name
 #ifdef CONFIG_EXPOLINE_EXTERN
-	.pushsection .text,"ax",@progbits
-	.align 16,0x07
+	SYM_CODE_START(\name)
 #else
 	.pushsection .text.\name,"axG",@progbits,\name,comdat
-#endif
 	.globl \name
 	.hidden \name
 	.type \name,@function
 \name:
 	CFI_STARTPROC
+#endif
 	.endm
 
 	.macro __THUNK_EPILOG_NAME name
-	CFI_ENDPROC
 #ifdef CONFIG_EXPOLINE_EXTERN
-	.size \name, .-\name
-#endif
+	SYM_CODE_END(\name)
+	EXPORT_SYMBOL(\name)
+#else
+	CFI_ENDPROC
 	.popsection
+#endif
 	.endm
 
 	.macro __THUNK_PROLOG_BR r1
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 147a8d547ef9..3ee9e8f5ceae 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -8,15 +8,34 @@
 #ifndef _ASM_S390_OS_INFO_H
 #define _ASM_S390_OS_INFO_H
 
+#include <linux/uio.h>
+
 #define OS_INFO_VERSION_MAJOR	1
 #define OS_INFO_VERSION_MINOR	1
 #define OS_INFO_MAGIC		0x4f53494e464f535aULL /* OSINFOSZ */
 
 #define OS_INFO_VMCOREINFO	0
 #define OS_INFO_REIPL_BLOCK	1
+#define OS_INFO_FLAGS_ENTRY	2
+#define OS_INFO_RESERVED	3
+#define OS_INFO_IDENTITY_BASE	4
+#define OS_INFO_KASLR_OFFSET	5
+#define OS_INFO_KASLR_OFF_PHYS	6
+#define OS_INFO_VMEMMAP		7
+#define OS_INFO_AMODE31_START	8
+#define OS_INFO_AMODE31_END	9
+#define OS_INFO_IMAGE_START	10
+#define OS_INFO_IMAGE_END	11
+#define OS_INFO_IMAGE_PHYS	12
+#define OS_INFO_MAX		13
+
+#define OS_INFO_FLAG_REIPL_CLEAR	(1UL << 0)
 
 struct os_info_entry {
-	u64	addr;
+	union {
+		u64	addr;
+		u64	val;
+	};
 	u64	size;
 	u32	csum;
 } __packed;
@@ -28,18 +47,24 @@ struct os_info {
 	u16	version_minor;
 	u64	crashkernel_addr;
 	u64	crashkernel_size;
-	struct os_info_entry entry[2];
-	u8	reserved[4024];
+	struct os_info_entry entry[OS_INFO_MAX];
+	u8	reserved[3804];
 } __packed;
 
 void os_info_init(void);
-void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_entry_add_data(int nr, void *ptr, u64 len);
+void os_info_entry_add_val(int nr, u64 val);
 void os_info_crashkernel_add(unsigned long base, unsigned long size);
 u32 os_info_csum(struct os_info *os_info);
 
 #ifdef CONFIG_CRASH_DUMP
 void *os_info_old_entry(int nr, unsigned long *size);
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+static inline unsigned long os_info_old_value(int nr)
+{
+	unsigned long size;
+
+	return (unsigned long)os_info_old_entry(nr, &size);
+}
 #else
 static inline void *os_info_old_entry(int nr, unsigned long *size)
 {
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index c33c4deb545f..794fdb21500a 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,6 +7,8 @@
 #ifndef PAGE_STATES_H
 #define PAGE_STATES_H
 
+#include <asm/page.h>
+
 #define ESSA_GET_STATE			0
 #define ESSA_SET_STABLE			1
 #define ESSA_SET_UNUSED			2
@@ -18,4 +20,60 @@
 
 #define ESSA_MAX	ESSA_SET_STABLE_NODAT
 
+extern int cmma_flag;
+
+static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
+{
+	unsigned long rc;
+
+	asm volatile(
+		"	.insn	rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
+		: [rc] "=d" (rc)
+		: [paddr] "d" (paddr),
+		  [cmd] "i" (cmd));
+	return rc;
+}
+
+static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
+{
+	unsigned long paddr = __pa(addr) & PAGE_MASK;
+
+	while (num_pages--) {
+		essa(paddr, cmd);
+		paddr += PAGE_SIZE;
+	}
+}
+
+static inline void __set_page_unused(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_UNUSED);
+}
+
+static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_STABLE);
+}
+
+static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
+{
+	__set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
+}
+
+static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_flag < 2)
+		__set_page_stable_dat(addr, num_pages);
+	else
+		__set_page_stable_nodat(addr, num_pages);
+}
+
+static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
+{
+	if (!cmma_flag)
+		return;
+	__set_page_stable_dat(addr, num_pages);
+}
+
 #endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 61dea67bb9c7..4e5dbabdf202 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -10,16 +10,11 @@
 
 #include <linux/const.h>
 #include <asm/types.h>
+#include <asm/asm.h>
 
-#define _PAGE_SHIFT	12
-#define _PAGE_SIZE	(_AC(1, UL) << _PAGE_SHIFT)
-#define _PAGE_MASK	(~(_PAGE_SIZE - 1))
+#include <vdso/page.h>
 
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT	_PAGE_SHIFT
-#define PAGE_SIZE	_PAGE_SIZE
-#define PAGE_MASK	_PAGE_MASK
-#define PAGE_DEFAULT_ACC	0
+#define PAGE_DEFAULT_ACC	_AC(0, UL)
 /* storage-protection override */
 #define PAGE_SPO_ACC		9
 #define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
@@ -73,13 +68,14 @@ static inline void copy_page(void *to, void *from)
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
+#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
+	vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
 
-/*
- * These are used to make use of C type-checking..
- */
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+#define STRICT_MM_TYPECHECKS
+#endif
+
+#ifdef STRICT_MM_TYPECHECKS
 
 typedef struct { unsigned long pgprot; } pgprot_t;
 typedef struct { unsigned long pgste; } pgste_t;
@@ -88,43 +84,48 @@ typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pud; } pud_t;
 typedef struct { unsigned long p4d; } p4d_t;
 typedef struct { unsigned long pgd; } pgd_t;
-typedef pte_t *pgtable_t;
-
-#define pgprot_val(x)	((x).pgprot)
-#define pgste_val(x)	((x).pgste)
 
-static inline unsigned long pte_val(pte_t pte)
-{
-	return pte.pte;
+#define DEFINE_PGVAL_FUNC(name)						\
+static __always_inline unsigned long name ## _val(name ## _t name)	\
+{									\
+	return name.name;						\
 }
 
-static inline unsigned long pmd_val(pmd_t pmd)
-{
-	return pmd.pmd;
-}
+#else /* STRICT_MM_TYPECHECKS */
 
-static inline unsigned long pud_val(pud_t pud)
-{
-	return pud.pud;
-}
+typedef unsigned long pgprot_t;
+typedef unsigned long pgste_t;
+typedef unsigned long pte_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long p4d_t;
+typedef unsigned long pgd_t;
 
-static inline unsigned long p4d_val(p4d_t p4d)
-{
-	return p4d.p4d;
+#define DEFINE_PGVAL_FUNC(name)						\
+static __always_inline unsigned long name ## _val(name ## _t name)	\
+{									\
+	return name;							\
 }
 
-static inline unsigned long pgd_val(pgd_t pgd)
-{
-	return pgd.pgd;
-}
+#endif /* STRICT_MM_TYPECHECKS */
+
+DEFINE_PGVAL_FUNC(pgprot)
+DEFINE_PGVAL_FUNC(pgste)
+DEFINE_PGVAL_FUNC(pte)
+DEFINE_PGVAL_FUNC(pmd)
+DEFINE_PGVAL_FUNC(pud)
+DEFINE_PGVAL_FUNC(p4d)
+DEFINE_PGVAL_FUNC(pgd)
+
+typedef pte_t *pgtable_t;
 
+#define __pgprot(x)	((pgprot_t) { (x) } )
 #define __pgste(x)	((pgste_t) { (x) } )
 #define __pte(x)        ((pte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
 #define __pud(x)	((pud_t) { (x) } )
 #define __p4d(x)	((p4d_t) { (x) } )
 #define __pgd(x)        ((pgd_t) { (x) } )
-#define __pgprot(x)     ((pgprot_t) { (x) } )
 
 static inline void page_set_storage_key(unsigned long addr,
 					unsigned char skey, int mapped)
@@ -149,11 +150,12 @@ static inline int page_reset_referenced(unsigned long addr)
 	int cc;
 
 	asm volatile(
-		"	rrbe	0,%1\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "a" (addr) : "cc");
-	return cc;
+		"	rrbe	0,%[addr]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [addr] "a" (addr)
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
 /* Bits int the storage key */
@@ -163,9 +165,9 @@ static inline int page_reset_referenced(unsigned long addr)
 #define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
 
 struct page;
+struct folio;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
-void arch_set_page_dat(struct page *page, int order);
 
 static inline int devmem_is_allowed(unsigned long pfn)
 {
@@ -175,37 +177,114 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define HAVE_ARCH_FREE_PAGE
 #define HAVE_ARCH_ALLOC_PAGE
 
-#if IS_ENABLED(CONFIG_PGSTE)
-int arch_make_page_accessible(struct page *page);
-#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+int arch_make_folio_accessible(struct folio *folio);
+#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
+
+struct vm_layout {
+	unsigned long kaslr_offset;
+	unsigned long kaslr_offset_phys;
+	unsigned long identity_base;
+	unsigned long identity_size;
+};
+
+extern struct vm_layout vm_layout;
+
+#define __kaslr_offset		vm_layout.kaslr_offset
+#define __kaslr_offset_phys	vm_layout.kaslr_offset_phys
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+#define __identity_base		vm_layout.identity_base
+#else
+#define __identity_base		0UL
 #endif
+#define ident_map_size		vm_layout.identity_size
 
-#endif /* !__ASSEMBLY__ */
+static inline unsigned long kaslr_offset(void)
+{
+	return __kaslr_offset;
+}
+
+extern int __kaslr_enabled;
+static inline int kaslr_enabled(void)
+{
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+		return __kaslr_enabled;
+	return 0;
+}
+
+#define __PAGE_OFFSET		__identity_base
+#define PAGE_OFFSET		__PAGE_OFFSET
 
-#define __PAGE_OFFSET		0x0UL
-#define PAGE_OFFSET		0x0UL
+#ifdef __DECOMPRESSOR
 
-#define __pa(x)			((unsigned long)(x))
+#define __pa_nodebug(x)		((unsigned long)(x))
+#define __pa(x)			__pa_nodebug(x)
+#define __pa32(x)		__pa(x)
 #define __va(x)			((void *)(unsigned long)(x))
 
+#else /* __DECOMPRESSOR */
+
+static inline unsigned long __pa_nodebug(unsigned long x)
+{
+	if (x < __kaslr_offset)
+		return x - __identity_base;
+	return x - __kaslr_offset + __kaslr_offset_phys;
+}
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+
+unsigned long __phys_addr(unsigned long x, bool is_31bit);
+
+#else /* CONFIG_DEBUG_VIRTUAL */
+
+static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
+{
+	return __pa_nodebug(x);
+}
+
+#endif /* CONFIG_DEBUG_VIRTUAL */
+
+#define __pa(x)			__phys_addr((unsigned long)(x), false)
+#define __pa32(x)		__phys_addr((unsigned long)(x), true)
+#define __va(x)			((void *)((unsigned long)(x) + __identity_base))
+
+#endif /* __DECOMPRESSOR */
+
 #define phys_to_pfn(phys)	((phys) >> PAGE_SHIFT)
 #define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
 
-#define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
-#define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
+#define phys_to_folio(phys)	page_folio(phys_to_page(phys))
+#define folio_to_phys(page)	pfn_to_phys(folio_pfn(folio))
+
+static inline void *pfn_to_virt(unsigned long pfn)
+{
+	return __va(pfn_to_phys(pfn));
+}
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+	return phys_to_pfn(__pa(kaddr));
+}
 
-#define pfn_to_virt(pfn)	__va(pfn_to_phys(pfn))
-#define virt_to_pfn(kaddr)	(phys_to_pfn(__pa(kaddr)))
 #define pfn_to_kaddr(pfn)	pfn_to_virt(pfn)
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
-#define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(kaddr)	pfn_valid(phys_to_pfn(__pa_nodebug((unsigned long)(kaddr))))
 
 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
 
+#endif /* !__ASSEMBLY__ */
+
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
 
+#define AMODE31_SIZE		(3 * PAGE_SIZE)
+
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+#define __NO_KASLR_START_KERNEL	CONFIG_KERNEL_IMAGE_BASE
+#define __NO_KASLR_END_KERNEL	(__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
+
+#define TEXT_OFFSET		0x100000
+
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 5b7e33ac6f0b..ebeabd0aaa51 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -11,13 +11,16 @@
 #include <linux/jump_label.h>
 #include <asm/lowcore.h>
 #include <asm/ptrace.h>
+#include <asm/asm.h>
 
 struct qpaci_info_block {
 	u64 header;
 	struct {
 		u64 : 8;
-		u64 num_cc : 8;	/* # of supported crypto counters */
-		u64 : 48;
+		u64 num_cc : 8;		/* # of supported crypto counters */
+		u64 : 9;
+		u64 num_nnpa : 7;	/* # of supported NNPA counters */
+		u64 : 32;
 	};
 };
 
@@ -31,17 +34,18 @@ static inline int qpaci(struct qpaci_info_block *info)
 		"	lgr	0,%[size]\n"
 		"	.insn	s,0xb28f0000,%[info]\n"
 		"	lgr	%[size],0\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [info] "=Q" (*info), [size] "+&d" (size)
 		:
-		: "0", "cc", "memory");
-	return cc ? (size + 1) * sizeof(u64) : 0;
+		: CC_CLOBBER_LIST("0", "memory"));
+	return CC_TRANSFORM(cc) ? (size + 1) * sizeof(u64) : 0;
 }
 
 #define PAI_CRYPTO_BASE			0x1000	/* First event number */
 #define PAI_CRYPTO_MAXCTR		256	/* Max # of event counters */
 #define PAI_CRYPTO_KERNEL_OFFSET	2048
+#define PAI_NNPA_BASE			0x1800	/* First event number */
+#define PAI_NNPA_MAXCTR			128	/* Max # of event counters */
 
 DECLARE_STATIC_KEY_FALSE(pai_key);
 
@@ -51,11 +55,11 @@ static __always_inline void pai_kernel_enter(struct pt_regs *regs)
 		return;
 	if (!static_branch_unlikely(&pai_key))
 		return;
-	if (!S390_lowcore.ccd)
+	if (!get_lowcore()->ccd)
 		return;
 	if (!user_mode(regs))
 		return;
-	WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+	WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd | PAI_CRYPTO_KERNEL_OFFSET);
 }
 
 static __always_inline void pai_kernel_exit(struct pt_regs *regs)
@@ -64,11 +68,15 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs)
 		return;
 	if (!static_branch_unlikely(&pai_key))
 		return;
-	if (!S390_lowcore.ccd)
+	if (!get_lowcore()->ccd)
 		return;
 	if (!user_mode(regs))
 		return;
-	WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+	WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
 }
 
+#define PAI_SAVE_AREA(x)	((x)->hw.event_base)
+#define PAI_CPU_MASK(x)		((x)->hw.addr_filters)
+#define PAI_SWLIST(x)		(&(x)->hw.tp_list)
+
 #endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index fdb9745ee998..41f900f693d9 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,11 +6,14 @@
 #include <linux/mutex.h>
 #include <linux/iommu.h>
 #include <linux/pci_hotplug.h>
-#include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
+#include <asm/pci_insn.h>
 #include <asm/sclp.h>
 
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
+#define arch_can_pci_mmap_wc()		1
+
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
@@ -96,7 +99,7 @@ struct zpci_bar_struct {
 	u8		size;		/* order 2 exponent */
 };
 
-struct s390_domain;
+struct kvm_zdev;
 
 #define ZPCI_FUNCTIONS_PER_BUS 256
 struct zpci_bus {
@@ -106,9 +109,10 @@ struct zpci_bus {
 	struct list_head	resources;
 	struct list_head	bus_next;
 	struct resource		bus_resource;
-	int			pchid;
+	int			topo;		/* TID if topo_is_tid, PCHID otherwise */
 	int			domain_nr;
-	bool			multifunction;
+	u8			multifunction	: 1;
+	u8			topo_is_tid	: 1;
 	enum pci_bus_speed	max_bus_speed;
 };
 
@@ -116,28 +120,36 @@ struct zpci_bus {
 struct zpci_dev {
 	struct zpci_bus *zbus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
-	struct list_head bus_next;
+	struct list_head iommu_list;
 	struct kref kref;
+	struct rcu_head rcu;
 	struct hotplug_slot hotplug_slot;
 
+	struct mutex state_lock;	/* protect state changes */
 	enum zpci_state state;
 	u32		fid;		/* function ID, used by sclp */
 	u32		fh;		/* function handle, used by insn's */
+	u32		gisa;		/* GISA designation for passthrough */
 	u16		vfn;		/* virtual function number */
 	u16		pchid;		/* physical channel ID */
+	u16		maxstbl;	/* Maximum store block size */
+	u16		rid;		/* RID as supplied by firmware */
+	u16		tid;		/* Topology for which RID is valid */
 	u8		pfgid;		/* function group ID */
 	u8		pft;		/* pci function type */
 	u8		port;
+	u8		fidparm;
+	u8		dtsm;		/* Supported DT mask */
 	u8		rid_available	: 1;
 	u8		has_hp_slot	: 1;
 	u8		has_resources	: 1;
 	u8		is_physfn	: 1;
 	u8		util_str_avail	: 1;
 	u8		irqs_registered	: 1;
-	u8		reserved	: 2;
+	u8		tid_avail	: 1;
+	u8		rtr_avail	: 1; /* Relaxed translation allowed */
 	unsigned int	devfn;		/* DEVFN part of the RID*/
 
-	struct mutex lock;
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
@@ -152,16 +164,8 @@ struct zpci_dev {
 
 	/* DMA stuff */
 	unsigned long	*dma_table;
-	spinlock_t	dma_table_lock;
 	int		tlb_refresh;
 
-	spinlock_t	iommu_bitmap_lock;
-	unsigned long	*iommu_bitmap;
-	unsigned long	*lazy_bitmap;
-	unsigned long	iommu_size;
-	unsigned long	iommu_pages;
-	unsigned int	next_bit;
-
 	struct iommu_device iommu_dev;  /* IOMMU core handle */
 
 	char res_name[16];
@@ -173,20 +177,21 @@ struct zpci_dev {
 	u64		dma_mask;	/* DMA address space mask */
 
 	/* Function measurement block */
+	struct mutex fmb_lock;
 	struct zpci_fmb *fmb;
 	u16		fmb_update;	/* update interval */
 	u16		fmb_length;
-	/* software counters */
-	atomic64_t allocated_pages;
-	atomic64_t mapped_pages;
-	atomic64_t unmapped_pages;
 
 	u8		version;
 	enum pci_bus_speed max_bus_speed;
 
 	struct dentry	*debugfs_dev;
 
-	struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+	/* IOMMU and passthrough */
+	struct iommu_domain *s390_domain; /* attached IOMMU domain */
+	struct kvm_zdev *kzdev;
+	struct mutex kzdev_lock;
+	spinlock_t dom_lock;		/* protect s390_domain change */
 };
 
 static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -194,31 +199,44 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
 	return (zdev->fh & (1UL << 31)) ? true : false;
 }
 
-extern const struct attribute_group *zpci_attr_groups[];
+extern const struct attribute_group zpci_attr_group;
+extern const struct attribute_group pfip_attr_group;
+extern const struct attribute_group zpci_ident_attr_group;
+
+#define ARCH_PCI_DEV_GROUPS &zpci_attr_group,		 \
+			    &pfip_attr_group,		 \
+			    &zpci_ident_attr_group,
+
 extern unsigned int s390_pci_force_floating __initdata;
 extern unsigned int s390_pci_no_rid;
 
+extern union zpci_sic_iib *zpci_aipb;
+extern struct airq_iv *zpci_aif_sbv;
+
 /* -----------------------------------------------------------------------------
   Prototypes
 ----------------------------------------------------------------------------- */
 /* Base stuff */
 struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
+int zpci_add_device(struct zpci_dev *zdev);
 int zpci_enable_device(struct zpci_dev *);
+int zpci_reenable_device(struct zpci_dev *zdev);
 int zpci_disable_device(struct zpci_dev *);
 int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
 int zpci_deconfigure_device(struct zpci_dev *zdev);
 void zpci_device_reserved(struct zpci_dev *zdev);
 bool zpci_is_device_configured(struct zpci_dev *zdev);
+int zpci_scan_devices(void);
 
 int zpci_hot_reset_device(struct zpci_dev *zdev);
-int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
 void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
 
 /* CLP */
 int clp_setup_writeback_mio(void);
-int clp_scan_pci_devices(void);
+int clp_scan_pci_devices(struct list_head *scan_list);
 int clp_query_pci_fn(struct zpci_dev *zdev);
 int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as);
 int clp_disable_fh(struct zpci_dev *zdev, u32 *fh);
@@ -231,6 +249,7 @@ void update_uid_checking(bool new);
 /* IOMMU Interface */
 int zpci_init_iommu(struct zpci_dev *zdev);
 void zpci_destroy_iommu(struct zpci_dev *zdev);
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status);
 
 #ifdef CONFIG_PCI
 static inline bool zpci_use_mio(struct zpci_dev *zdev)
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 1f4b666e85ee..7ebff39c84b3 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
 #define CLP_UTIL_STR_LEN	64
 #define CLP_PFIP_NR_SEGMENTS	4
 
+/* PCI function type numbers */
+#define PCI_FUNC_TYPE_ISM	0x5	/* ISM device */
+
 extern bool zpci_unique_uid;
 
 struct clp_rsp_slpc_pci {
@@ -107,7 +110,8 @@ struct clp_req_query_pci {
 struct clp_rsp_query_pci {
 	struct clp_rsp_hdr hdr;
 	u16 vfn;			/* virtual fn number */
-	u16			:  3;
+	u16			:  2;
+	u16 tid_avail		:  1;
 	u16 rid_avail		:  1;
 	u16 is_physfn		:  1;
 	u16 reserved1		:  1;
@@ -119,16 +123,18 @@ struct clp_rsp_query_pci {
 	u16 pchid;
 	__le32 bar[PCI_STD_NUM_BARS];
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
-	u16			: 12;
-	u16 port		:  4;
+	u8 fidparm;
+	u8 reserved3		:  4;
+	u8 port			:  4;
 	u8 fmb_len;
 	u8 pft;				/* pci function type */
 	u64 sdma;			/* start dma as */
 	u64 edma;			/* end dma as */
 #define ZPCI_RID_MASK_DEVFN 0x00ff
 	u16 rid;			/* BUS/DEVFN PCI address */
-	u16 reserved0;
-	u32 reserved[10];
+	u32 reserved0;
+	u16 tid;
+	u32 reserved[9];
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
 	u32 reserved2[16];
@@ -150,12 +156,16 @@ struct clp_rsp_query_pci_grp {
 	u16			:  4;
 	u16 noi			: 12;	/* number of interrupts */
 	u8 version;
-	u8			:  6;
+	u8			:  2;
+	u8 rtr			:  1;	/* Relaxed translation requirement */
+	u8			:  3;
 	u8 frame		:  1;
 	u8 refresh		:  1;	/* TLB refresh mode */
-	u16 reserved2;
+	u16			:  3;
+	u16 maxstbl		: 13;	/* Maximum store block size */
 	u16 mui;
-	u16			: 16;
+	u8 dtsm;			/* Supported DT mask */
+	u8 reserved3;
 	u16 maxfaal;
 	u16			:  4;
 	u16 dnoi		: 12;
@@ -173,7 +183,8 @@ struct clp_req_set_pci {
 	u16 reserved2;
 	u8 oc;				/* operation controls */
 	u8 ndas;			/* number of dma spaces */
-	u64 reserved3;
+	u32 reserved3;
+	u32 gisa;			/* GISA designation */
 } __packed;
 
 /* Set PCI function response */
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
 #define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
 
 #define ZPCI_TABLE_SIZE_RT	(1UL << 42)
+#define ZPCI_TABLE_SIZE_RS	(1UL << 53)
 
 #define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
 #define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
@@ -55,6 +56,8 @@ enum zpci_ioat_dtype {
 #define ZPCI_PT_BITS			8
 #define ZPCI_ST_SHIFT			(ZPCI_PT_BITS + PAGE_SHIFT)
 #define ZPCI_RT_SHIFT			(ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RS_SHIFT			(ZPCI_RT_SHIFT + ZPCI_TABLE_BITS)
+#define ZPCI_RF_SHIFT			(ZPCI_RS_SHIFT + ZPCI_TABLE_BITS)
 
 #define ZPCI_RTE_FLAG_MASK		0x3fffUL
 #define ZPCI_RTE_ADDR_MASK		(~ZPCI_RTE_FLAG_MASK)
@@ -82,116 +85,16 @@ enum zpci_ioat_dtype {
 #define ZPCI_TABLE_VALID_MASK		0x20
 #define ZPCI_TABLE_PROT_MASK		0x200
 
-static inline unsigned int calc_rtx(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_sx(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_px(dma_addr_t ptr)
-{
-	return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
-}
-
-static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
-{
-	*entry &= ZPCI_PTE_FLAG_MASK;
-	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
-}
-
-static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
-{
-	*entry &= ZPCI_RTE_FLAG_MASK;
-	*entry |= (sto & ZPCI_RTE_ADDR_MASK);
-	*entry |= ZPCI_TABLE_TYPE_RTX;
-}
-
-static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
-{
-	*entry &= ZPCI_STE_FLAG_MASK;
-	*entry |= (pto & ZPCI_STE_ADDR_MASK);
-	*entry |= ZPCI_TABLE_TYPE_SX;
-}
-
-static inline void validate_rt_entry(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_VALID_MASK;
-	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
-	*entry |= ZPCI_TABLE_VALID;
-	*entry |= ZPCI_TABLE_LEN_RTX;
-}
-
-static inline void validate_st_entry(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_VALID_MASK;
-	*entry |= ZPCI_TABLE_VALID;
-}
-
-static inline void invalidate_pt_entry(unsigned long *entry)
-{
-	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
-	*entry &= ~ZPCI_PTE_VALID_MASK;
-	*entry |= ZPCI_PTE_INVALID;
-}
-
-static inline void validate_pt_entry(unsigned long *entry)
-{
-	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
-	*entry &= ~ZPCI_PTE_VALID_MASK;
-	*entry |= ZPCI_PTE_VALID;
-}
-
-static inline void entry_set_protected(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_PROT_MASK;
-	*entry |= ZPCI_TABLE_PROTECTED;
-}
-
-static inline void entry_clr_protected(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_PROT_MASK;
-	*entry |= ZPCI_TABLE_UNPROTECTED;
-}
-
-static inline int reg_entry_isvalid(unsigned long entry)
-{
-	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
-}
-
-static inline int pt_entry_isvalid(unsigned long entry)
-{
-	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
-}
-
-static inline unsigned long *get_rt_sto(unsigned long entry)
-{
-	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
-		return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
-	else
-		return NULL;
-
-}
-
-static inline unsigned long *get_st_pto(unsigned long entry)
-{
-	if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
-		return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
-	else
-		return NULL;
-}
-
-/* Prototypes */
-void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(void);
-void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
-
-extern const struct dma_map_ops s390_pci_dma_ops;
+struct zpci_iommu_ctrs {
+	atomic64_t		mapped_pages;
+	atomic64_t		unmapped_pages;
+	atomic64_t		global_rpcits;
+	atomic64_t		sync_map_rpcits;
+	atomic64_t		sync_rpcits;
+};
+
+struct zpci_dev;
 
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
 
 #endif
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 61cf9531f68f..e5f57cfe1d45 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -98,6 +98,15 @@ struct zpci_fib {
 	u32 gd;
 } __packed __aligned(8);
 
+/* Set Interruption Controls Operation Controls  */
+#define	SIC_IRQ_MODE_ALL		0
+#define	SIC_IRQ_MODE_SINGLE		1
+#define	SIC_SET_AENI_CONTROLS		2
+#define	SIC_IRQ_MODE_DIRECT		4
+#define	SIC_IRQ_MODE_D_ALL		16
+#define	SIC_IRQ_MODE_D_SINGLE		17
+#define	SIC_IRQ_MODE_SET_CPU		18
+
 /* directed interruption information block */
 struct zpci_diib {
 	u32 : 1;
@@ -119,9 +128,20 @@ struct zpci_cdiib {
 	u64 : 64;
 } __packed __aligned(8);
 
+/* adapter interruption parameters block */
+struct zpci_aipb {
+	u64 faisb;
+	u64 gait;
+	u16 : 13;
+	u16 afi : 3;
+	u32 : 32;
+	u16 faal;
+} __packed __aligned(8);
+
 union zpci_sic_iib {
 	struct zpci_diib diib;
 	struct zpci_cdiib cdiib;
+	struct zpci_aipb aipb;
 };
 
 DECLARE_STATIC_KEY_FALSE(have_mio);
@@ -134,13 +154,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
 int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
 int __zpci_store_block(const u64 *data, u64 req, u64 offset);
 void zpci_barrier(void);
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
-
-static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
-{
-	union zpci_sic_iib iib = {{0}};
-
-	return __zpci_set_irq_ctrl(ctl, isc, &iib);
-}
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
 
 #endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 287bb88f7698..43a5ea4ee20f 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -11,6 +11,8 @@
 /* I/O size constraints */
 #define ZPCI_MAX_READ_SIZE	8
 #define ZPCI_MAX_WRITE_SIZE	128
+#define ZPCI_BOUNDARY_SIZE	(1 << 12)
+#define ZPCI_BOUNDARY_MASK	(ZPCI_BOUNDARY_SIZE - 1)
 
 /* I/O Map */
 #define ZPCI_IOMAP_SHIFT		48
@@ -125,28 +127,30 @@ out:
 int zpci_write_block(volatile void __iomem *dst, const void *src,
 		     unsigned long len);
 
-static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
+static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max)
 {
-	int count = len > max ? max : len, size = 1;
+	int offset = dst & ZPCI_BOUNDARY_MASK;
+	int size;
 
-	while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
-		dst = dst >> 1;
-		src = src >> 1;
-		size = size << 1;
-	}
-	return size;
+	size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max);
+	if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8))
+		return size;
+
+	if (size >= 8)
+		return 8;
+	return rounddown_pow_of_two(size);
 }
 
 static inline int zpci_memcpy_fromio(void *dst,
 				     const volatile void __iomem *src,
-				     unsigned long n)
+				     size_t n)
 {
 	int size, rc = 0;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64 __force) src,
-					       (u64) dst, n,
-					       ZPCI_MAX_READ_SIZE);
+		size = zpci_get_max_io_size((u64 __force) src,
+					    (u64) dst, n,
+					    ZPCI_MAX_READ_SIZE);
 		rc = zpci_read_single(dst, src, size);
 		if (rc)
 			break;
@@ -158,7 +162,7 @@ static inline int zpci_memcpy_fromio(void *dst,
 }
 
 static inline int zpci_memcpy_toio(volatile void __iomem *dst,
-				   const void *src, unsigned long n)
+				   const void *src, size_t n)
 {
 	int size, rc = 0;
 
@@ -166,9 +170,9 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
 		return -EINVAL;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64 __force) dst,
-					       (u64) src, n,
-					       ZPCI_MAX_WRITE_SIZE);
+		size = zpci_get_max_io_size((u64 __force) dst,
+					    (u64) src, n,
+					    ZPCI_MAX_WRITE_SIZE);
 		if (size > 8) /* main path */
 			rc = zpci_write_block(dst, src, size);
 		else
@@ -183,7 +187,7 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
 }
 
 static inline int zpci_memset_io(volatile void __iomem *dst,
-				 unsigned char val, size_t count)
+				 int val, size_t count)
 {
 	u8 *src = kmalloc(count, GFP_KERNEL);
 	int rc;
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index cb5fc0690435..84f6b8357b45 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -4,12 +4,13 @@
 
 #include <linux/preempt.h>
 #include <asm/cmpxchg.h>
+#include <asm/march.h>
 
 /*
  * s390 uses its own implementation for per cpu data, the offset of
  * the cpu local data area is cached in the cpu's lowcore memory.
  */
-#define __my_cpu_offset S390_lowcore.percpu_offset
+#define __my_cpu_offset get_lowcore()->percpu_offset
 
 /*
  * For 64 bit module code, the module may be more than 4G above the
@@ -31,7 +32,7 @@
 	pcp_op_T__ *ptr__;						\
 	preempt_disable_notrace();					\
 	ptr__ = raw_cpu_ptr(&(pcp));					\
-	prev__ = *ptr__;						\
+	prev__ = READ_ONCE(*ptr__);					\
 	do {								\
 		old__ = prev__;						\
 		new__ = old__ op (val);					\
@@ -50,7 +51,7 @@
 #define this_cpu_or_1(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
 #define this_cpu_or_2(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
 
-#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifndef MARCH_HAS_Z196_FEATURES
 
 #define this_cpu_add_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
 #define this_cpu_add_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
@@ -61,7 +62,7 @@
 #define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
 #define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
 
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#else /* MARCH_HAS_Z196_FEATURES */
 
 #define arch_this_cpu_add(pcp, val, op1, op2, szcast)			\
 {									\
@@ -129,7 +130,7 @@
 #define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op(pcp, val, "lao")
 #define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op(pcp, val, "laog")
 
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#endif /* MARCH_HAS_Z196_FEATURES */
 
 #define arch_this_cpu_cmpxchg(pcp, oval, nval)				\
 ({									\
@@ -148,6 +149,22 @@
 #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
 
+#define this_cpu_cmpxchg64(pcp, o, n)	this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, oval, nval)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	u128 old__, new__, ret__;					\
+	pcp_op_T__ *ptr__;						\
+	old__ = oval;							\
+	new__ = nval;							\
+	preempt_disable_notrace();					\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = cmpxchg128((void *)ptr__, old__, new__);		\
+	preempt_enable_notrace();					\
+	ret__;								\
+})
+
 #define arch_this_cpu_xchg(pcp, nval)					\
 ({									\
 	typeof(pcp) *ptr__;						\
@@ -164,24 +181,6 @@
 #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 
-#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2)	    \
-({									    \
-	typeof(pcp1) *p1__;						    \
-	typeof(pcp2) *p2__;						    \
-	int ret__;							    \
-									    \
-	preempt_disable_notrace();					    \
-	p1__ = raw_cpu_ptr(&(pcp1));					    \
-	p2__ = raw_cpu_ptr(&(pcp2));					    \
-	ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__,  \
-				 (unsigned long)(o1), (unsigned long)(o2),  \
-				 (unsigned long)(n1), (unsigned long)(n2)); \
-	preempt_enable_notrace();					    \
-	ret__;								    \
-})
-
-#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-
 #include <asm-generic/percpu.h>
 
 #endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index b9da71632827..e53894cedf08 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -37,9 +37,9 @@ extern ssize_t cpumf_events_sysfs_show(struct device *dev,
 
 /* Perf callbacks */
 struct pt_regs;
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
 #define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
 
 /* Perf pt_regs extension for sample-data-entry indicators */
@@ -48,33 +48,8 @@ struct perf_sf_sde_regs {
 	unsigned long reserved:63;	  /* reserved */
 };
 
-/* Perf PMU definitions for the counter facility */
-#define PERF_CPUM_CF_MAX_CTR		0xffffUL  /* Max ctr for ECCTR */
-
-/* Perf PMU definitions for the sampling facility */
-#define PERF_CPUM_SF_MAX_CTR		2
-#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
-#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
-#define PERF_EVENT_CPUM_CF_DIAG		0xBC000UL /* Event: Counter sets */
-#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
-#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
-#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
-					 PERF_CPUM_SF_DIAG_MODE)
-#define PERF_CPUM_SF_FULL_BLOCKS	0x0004	  /* Process full SDBs only */
-#define PERF_CPUM_SF_FREQ_MODE		0x0008	  /* Sampling with frequency */
-
-#define REG_NONE		0
-#define REG_OVERFLOW		1
-#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
-#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
-#define TEAR_REG(hwc)		((hwc)->last_tag)
-#define SAMPL_RATE(hwc)		((hwc)->event_base)
-#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
-#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
-#define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
-#define SAMPLE_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
-
 #define perf_arch_fetch_caller_regs(regs, __ip) do {			\
+	(regs)->psw.mask = 0;						\
 	(regs)->psw.addr = (__ip);					\
 	(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) -	\
 		offsetof(struct stack_frame, back_chain);		\
diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h
new file mode 100644
index 000000000000..a1bee4a1e470
--- /dev/null
+++ b/arch/s390/include/asm/pfault.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_PFAULT_H
+#define _ASM_S390_PFAULT_H
+
+#include <linux/errno.h>
+
+int __pfault_init(void);
+void __pfault_fini(void);
+
+static inline int pfault_init(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		return __pfault_init();
+	return -EOPNOTSUPP;
+}
+
+static inline void pfault_fini(void)
+{
+	if (IS_ENABLED(CONFIG_PFAULT))
+		__pfault_fini();
+}
+
+#endif /* _ASM_S390_PFAULT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 17eb618f1348..5345398df653 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -23,11 +23,9 @@ unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
-struct page *page_table_alloc_pgste(struct mm_struct *mm);
+struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
-void page_table_free_pgste(struct page *page);
-extern int page_table_allocate_pgste;
+void page_table_free_pgste(struct ptdesc *ptdesc);
 
 static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 {
@@ -54,29 +52,42 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	unsigned long *table = crst_table_alloc(mm);
 
-	if (table)
-		crst_table_init(table, _REGION2_ENTRY_EMPTY);
+	if (!table)
+		return NULL;
+	crst_table_init(table, _REGION2_ENTRY_EMPTY);
+	pagetable_p4d_ctor(virt_to_ptdesc(table));
+
 	return (p4d_t *) table;
 }
 
 static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 {
-	if (!mm_p4d_folded(mm))
-		crst_table_free(mm, (unsigned long *) p4d);
+	if (mm_p4d_folded(mm))
+		return;
+
+	pagetable_dtor(virt_to_ptdesc(p4d));
+	crst_table_free(mm, (unsigned long *) p4d);
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	unsigned long *table = crst_table_alloc(mm);
-	if (table)
-		crst_table_init(table, _REGION3_ENTRY_EMPTY);
+
+	if (!table)
+		return NULL;
+	crst_table_init(table, _REGION3_ENTRY_EMPTY);
+	pagetable_pud_ctor(virt_to_ptdesc(table));
+
 	return (pud_t *) table;
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	if (!mm_pud_folded(mm))
-		crst_table_free(mm, (unsigned long *) pud);
+	if (mm_pud_folded(mm))
+		return;
+
+	pagetable_dtor(virt_to_ptdesc(pud));
+	crst_table_free(mm, (unsigned long *) pud);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
@@ -86,7 +97,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
 	if (!table)
 		return NULL;
 	crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
-	if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+	if (!pagetable_pmd_ctor(mm, virt_to_ptdesc(table))) {
 		crst_table_free(mm, table);
 		return NULL;
 	}
@@ -97,7 +108,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 	if (mm_pmd_folded(mm))
 		return;
-	pgtable_pmd_page_dtor(virt_to_page(pmd));
+	pagetable_dtor(virt_to_ptdesc(pmd));
 	crst_table_free(mm, (unsigned long *) pmd);
 }
 
@@ -118,11 +129,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return (pgd_t *) crst_table_alloc(mm);
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (!table)
+		return NULL;
+	pagetable_pgd_ctor(virt_to_ptdesc(table));
+
+	return (pgd_t *) table;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
+	pagetable_dtor(virt_to_ptdesc(pgd));
 	crst_table_free(mm, (unsigned long *) pgd);
 }
 
@@ -143,6 +161,10 @@ static inline void pmd_populate(struct mm_struct *mm,
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
 
+/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
 void vmem_map_init(void);
 void *vmem_crst_alloc(unsigned long val);
 pte_t *vmem_pte_alloc(void);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a397b072a580..6d8bc27a366e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -14,17 +14,19 @@
 
 #include <linux/sched.h>
 #include <linux/mm_types.h>
+#include <linux/cpufeature.h>
 #include <linux/page-flags.h>
 #include <linux/radix-tree.h>
 #include <linux/atomic.h>
-#include <asm/sections.h>
+#include <asm/ctlreg.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 #include <asm/uv.h>
 
 extern pgd_t swapper_pg_dir[];
+extern pgd_t invalid_pg_dir[];
 extern void paging_init(void);
-extern unsigned long s390_invalid_asce;
+extern struct ctlreg s390_invalid_asce;
 
 enum {
 	PG_DIRECT_MAP_4K = 0,
@@ -41,14 +43,12 @@ static inline void update_page_count(int level, long count)
 		atomic_long_add(count, &direct_pages_count[level]);
 }
 
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
-
 /*
  * The S390 doesn't have any external MMU info: the kernel page
  * tables contain all the necessary information.
  */
 #define update_mmu_cache(vma, address, ptep)     do { } while (0)
+#define update_mmu_cache_range(vmf, vma, addr, ptep, nr) do { } while (0)
 #define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
 
 /*
@@ -85,16 +85,14 @@ extern unsigned long zero_page_mask;
  * happen without trampolines and in addition the placement within a
  * 2GB frame is branch prediction unit friendly.
  */
-extern unsigned long __bootdata_preserved(VMALLOC_START);
-extern unsigned long __bootdata_preserved(VMALLOC_END);
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
 #define VMALLOC_DEFAULT_SIZE	((512UL << 30) - MODULES_LEN)
-extern struct page *__bootdata_preserved(vmemmap);
-extern unsigned long __bootdata_preserved(vmemmap_size);
-
-#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
+extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
 
-extern unsigned long __bootdata_preserved(MODULES_VADDR);
-extern unsigned long __bootdata_preserved(MODULES_END);
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
 #define MODULES_VADDR	MODULES_VADDR
 #define MODULES_END	MODULES_END
 #define MODULES_LEN	(1UL << 31)
@@ -109,6 +107,26 @@ static inline int is_module_addr(void *addr)
 	return 1;
 }
 
+#ifdef CONFIG_KMSAN
+#define KMSAN_VMALLOC_SIZE (VMALLOC_END - VMALLOC_START)
+#define KMSAN_VMALLOC_SHADOW_START VMALLOC_END
+#define KMSAN_VMALLOC_SHADOW_END (KMSAN_VMALLOC_SHADOW_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_VMALLOC_ORIGIN_START KMSAN_VMALLOC_SHADOW_END
+#define KMSAN_VMALLOC_ORIGIN_END (KMSAN_VMALLOC_ORIGIN_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_MODULES_SHADOW_START KMSAN_VMALLOC_ORIGIN_END
+#define KMSAN_MODULES_SHADOW_END (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#define KMSAN_MODULES_ORIGIN_START KMSAN_MODULES_SHADOW_END
+#define KMSAN_MODULES_ORIGIN_END (KMSAN_MODULES_ORIGIN_START + MODULES_LEN)
+#endif
+
+#ifdef CONFIG_RANDOMIZE_BASE
+#define KASLR_LEN	(1UL << 31)
+#else
+#define KASLR_LEN	0UL
+#endif
+
+void setup_protection_map(void);
+
 /*
  * A 64 bit pagetable entry of S390 has following format:
  * |			 PFRA			      |0IPC|  OS  |
@@ -181,6 +199,8 @@ static inline int is_module_addr(void *addr)
 #define _PAGE_SOFT_DIRTY 0x000
 #endif
 
+#define _PAGE_SW_BITS	0xffUL		/* All SW bits */
+
 #define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE	/* SW pte exclusive swap bit */
 
 /* Set of bits not changed in pte_modify */
@@ -188,6 +208,12 @@ static inline int is_module_addr(void *addr)
 				 _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
 
 /*
+ * Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
+ * HW bit and all SW bits.
+ */
+#define _PAGE_RDP_MASK		~(_PAGE_PROTECT | _PAGE_SW_BITS)
+
+/*
  * handle_pte_fault uses pte_present and pte_none to find out the pte type
  * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
  * distinguish present from not-present ptes. It is changed only with the page
@@ -253,28 +279,40 @@ static inline int is_module_addr(void *addr)
 #define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
 #define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
 #define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
-#define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \
+				 _REGION3_ENTRY_PRESENT)
 #define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
+#define _REGION3_ENTRY_HARDWARE_BITS		0xfffffffffffff6ffUL
+#define _REGION3_ENTRY_HARDWARE_BITS_LARGE	0xffffffff8001073cUL
 #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address	     */
 #define _REGION3_ENTRY_DIRTY	0x2000	/* SW region dirty bit */
 #define _REGION3_ENTRY_YOUNG	0x1000	/* SW region young bit */
+#define _REGION3_ENTRY_COMM	0x0010	/* Common-Region, marks swap entry */
 #define _REGION3_ENTRY_LARGE	0x0400	/* RTTE-format control, large page  */
-#define _REGION3_ENTRY_READ	0x0002	/* SW region read bit */
-#define _REGION3_ENTRY_WRITE	0x0001	/* SW region write bit */
+#define _REGION3_ENTRY_WRITE	0x8000	/* SW region write bit */
+#define _REGION3_ENTRY_READ	0x4000	/* SW region read bit */
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */
 #else
 #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
 #endif
 
 #define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
 
+/*
+ * SW region present bit. For non-leaf region-third-table entries, bits 62-63
+ * indicate the TABLE LENGTH and both must be set to 1. But such entries
+ * would always be considered as present, so it is safe to use bit 63 as
+ * PRESENT bit for PUD.
+ */
+#define _REGION3_ENTRY_PRESENT	0x0001
+
 /* Bits in the segment table entry */
-#define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
-#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
+#define _SEGMENT_ENTRY_BITS			0xfffffffffffffe3fUL
+#define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe3cUL
+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff1073cUL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
 #define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* page table origin		    */
 #define _SEGMENT_ENTRY_PROTECT	0x200	/* segment protection bit	    */
@@ -282,21 +320,29 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
 #define _SEGMENT_ENTRY_TYPE_MASK 0x0c	/* segment table type mask	    */
 
-#define _SEGMENT_ENTRY		(0)
+#define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PRESENT)
 #define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
 
 #define _SEGMENT_ENTRY_DIRTY	0x2000	/* SW segment dirty bit */
 #define _SEGMENT_ENTRY_YOUNG	0x1000	/* SW segment young bit */
+
+#define _SEGMENT_ENTRY_COMM	0x0010	/* Common-Segment, marks swap entry */
 #define _SEGMENT_ENTRY_LARGE	0x0400	/* STE-format control, large page */
-#define _SEGMENT_ENTRY_WRITE	0x0002	/* SW segment write bit */
-#define _SEGMENT_ENTRY_READ	0x0001	/* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE	0x8000	/* SW segment write bit */
+#define _SEGMENT_ENTRY_READ	0x4000	/* SW segment read bit */
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */
 #else
 #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
 #endif
 
+#define _SEGMENT_ENTRY_PRESENT	0x0001	/* SW segment present bit */
+
+/* Common bits in region and segment table entries, for swap entries */
+#define _RST_ENTRY_COMM		0x0010	/* Common-Region/Segment, marks swap entry */
+#define _RST_ENTRY_INVALID	0x0020	/* invalid region/segment table entry */
+
 #define _CRST_ENTRIES	2048	/* number of region/segment table entries */
 #define _PAGE_ENTRIES	256	/* number of page table entries	*/
 
@@ -312,7 +358,7 @@ static inline int is_module_addr(void *addr)
 #define _REGION2_INDEX	(0x7ffUL << _REGION2_SHIFT)
 #define _REGION3_INDEX	(0x7ffUL << _REGION3_SHIFT)
 #define _SEGMENT_INDEX	(0x7ffUL << _SEGMENT_SHIFT)
-#define _PAGE_INDEX	(0xffUL  << _PAGE_SHIFT)
+#define _PAGE_INDEX	(0xffUL  << PAGE_SHIFT)
 
 #define _REGION1_SIZE	(1UL << _REGION1_SHIFT)
 #define _REGION2_SIZE	(1UL << _REGION2_SHIFT)
@@ -375,9 +421,10 @@ static inline int is_module_addr(void *addr)
 #define PGSTE_HC_BIT	0x0020000000000000UL
 #define PGSTE_GR_BIT	0x0004000000000000UL
 #define PGSTE_GC_BIT	0x0002000000000000UL
-#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
-#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
-#define PGSTE_VSIE_BIT	0x0000200000000000UL	/* ref'd in a shadow table */
+#define PGSTE_ST2_MASK	0x0000ffff00000000UL
+#define PGSTE_UC_BIT	0x0000000000008000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000000000004000UL	/* IPTE notify bit */
+#define PGSTE_VSIE_BIT	0x0000000000002000UL	/* ref'd in a shadow table */
 
 /* Guest Page State used for virtualization */
 #define _PGSTE_GPS_ZERO			0x0000000080000000UL
@@ -399,101 +446,107 @@ static inline int is_module_addr(void *addr)
 /*
  * Page protection definitions.
  */
-#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RO		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_NONE		(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
+#define __PAGE_RO		(_PAGE_PRESENT | _PAGE_READ | \
 				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RX		__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+#define __PAGE_RX		(_PAGE_PRESENT | _PAGE_READ | \
 				 _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RW		__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RW		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
-#define PAGE_RWX	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_RWX		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_INVALID | _PAGE_PROTECT)
-
-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_SHARED		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define __PAGE_KERNEL		(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 				 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+#define __PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
 				 _PAGE_PROTECT | _PAGE_NOEXEC)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
-				  _PAGE_YOUNG |	_PAGE_DIRTY)
 
-/*
- * On s390 the page table entry has an invalid bit and a read-only bit.
- * Read permission implies execute permission and write permission
- * implies read permission.
- */
-         /*xwr*/
-#define __P000	PAGE_NONE
-#define __P001	PAGE_RO
-#define __P010	PAGE_RO
-#define __P011	PAGE_RO
-#define __P100	PAGE_RX
-#define __P101	PAGE_RX
-#define __P110	PAGE_RX
-#define __P111	PAGE_RX
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_RO
-#define __S010	PAGE_RW
-#define __S011	PAGE_RW
-#define __S100	PAGE_RX
-#define __S101	PAGE_RX
-#define __S110	PAGE_RWX
-#define __S111	PAGE_RWX
+extern unsigned long page_noexec_mask;
+
+#define __pgprot_page_mask(x)	__pgprot((x) & page_noexec_mask)
+
+#define PAGE_NONE		__pgprot_page_mask(__PAGE_NONE)
+#define PAGE_RO			__pgprot_page_mask(__PAGE_RO)
+#define PAGE_RX			__pgprot_page_mask(__PAGE_RX)
+#define PAGE_RW			__pgprot_page_mask(__PAGE_RW)
+#define PAGE_RWX		__pgprot_page_mask(__PAGE_RWX)
+#define PAGE_SHARED		__pgprot_page_mask(__PAGE_SHARED)
+#define PAGE_KERNEL		__pgprot_page_mask(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO		__pgprot_page_mask(__PAGE_KERNEL_RO)
 
 /*
  * Segment entry (large page) protection definitions.
  */
-#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
+#define __SEGMENT_NONE		(_SEGMENT_ENTRY_PRESENT | \
+				 _SEGMENT_ENTRY_INVALID | \
 				 _SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_RO	__pgprot(_SEGMENT_ENTRY_PROTECT | \
+#define __SEGMENT_RO		(_SEGMENT_ENTRY_PRESENT | \
+				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RX	__pgprot(_SEGMENT_ENTRY_PROTECT | \
+#define __SEGMENT_RX		(_SEGMENT_ENTRY_PRESENT | \
+				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_READ)
-#define SEGMENT_RW	__pgprot(_SEGMENT_ENTRY_READ | \
+#define __SEGMENT_RW		(_SEGMENT_ENTRY_PRESENT | \
+				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_RWX	__pgprot(_SEGMENT_ENTRY_READ | \
+#define __SEGMENT_RWX		(_SEGMENT_ENTRY_PRESENT | \
+				 _SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE)
-#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+#define __SEGMENT_KERNEL	(_SEGMENT_ENTRY |	\
 				 _SEGMENT_ENTRY_LARGE |	\
 				 _SEGMENT_ENTRY_READ |	\
 				 _SEGMENT_ENTRY_WRITE | \
 				 _SEGMENT_ENTRY_YOUNG | \
 				 _SEGMENT_ENTRY_DIRTY | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+#define __SEGMENT_KERNEL_RO	(_SEGMENT_ENTRY |	\
 				 _SEGMENT_ENTRY_LARGE |	\
 				 _SEGMENT_ENTRY_READ |	\
 				 _SEGMENT_ENTRY_YOUNG |	\
 				 _SEGMENT_ENTRY_PROTECT | \
 				 _SEGMENT_ENTRY_NOEXEC)
-#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY |	\
-				 _SEGMENT_ENTRY_LARGE |	\
-				 _SEGMENT_ENTRY_READ |	\
-				 _SEGMENT_ENTRY_WRITE | \
-				 _SEGMENT_ENTRY_YOUNG |	\
-				 _SEGMENT_ENTRY_DIRTY)
+
+extern unsigned long segment_noexec_mask;
+
+#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask)
+
+#define SEGMENT_NONE		__pgprot_segment_mask(__SEGMENT_NONE)
+#define SEGMENT_RO		__pgprot_segment_mask(__SEGMENT_RO)
+#define SEGMENT_RX		__pgprot_segment_mask(__SEGMENT_RX)
+#define SEGMENT_RW		__pgprot_segment_mask(__SEGMENT_RW)
+#define SEGMENT_RWX		__pgprot_segment_mask(__SEGMENT_RWX)
+#define SEGMENT_KERNEL		__pgprot_segment_mask(__SEGMENT_KERNEL)
+#define SEGMENT_KERNEL_RO	__pgprot_segment_mask(__SEGMENT_KERNEL_RO)
 
 /*
  * Region3 entry (large page) protection definitions.
  */
 
-#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
-				 _REGION3_ENTRY_LARGE |	 \
-				 _REGION3_ENTRY_READ |	 \
-				 _REGION3_ENTRY_WRITE |	 \
-				 _REGION3_ENTRY_YOUNG |	 \
+#define __REGION3_KERNEL	(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_PRESENT | \
+				 _REGION3_ENTRY_LARGE | \
+				 _REGION3_ENTRY_READ | \
+				 _REGION3_ENTRY_WRITE | \
+				 _REGION3_ENTRY_YOUNG | \
 				 _REGION3_ENTRY_DIRTY | \
 				 _REGION_ENTRY_NOEXEC)
-#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
-				   _REGION3_ENTRY_LARGE |  \
-				   _REGION3_ENTRY_READ |   \
-				   _REGION3_ENTRY_YOUNG |  \
-				   _REGION_ENTRY_PROTECT | \
-				   _REGION_ENTRY_NOEXEC)
+#define __REGION3_KERNEL_RO	(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_PRESENT | \
+				 _REGION3_ENTRY_LARGE | \
+				 _REGION3_ENTRY_READ | \
+				 _REGION3_ENTRY_YOUNG | \
+				 _REGION_ENTRY_PROTECT | \
+				 _REGION_ENTRY_NOEXEC)
+
+extern unsigned long region_noexec_mask;
+
+#define __pgprot_region_mask(x)	__pgprot((x) & region_noexec_mask)
+
+#define REGION3_KERNEL		__pgprot_region_mask(__REGION3_KERNEL)
+#define REGION3_KERNEL_RO	__pgprot_region_mask(__REGION3_KERNEL_RO)
 
 static inline bool mm_p4d_folded(struct mm_struct *mm)
 {
@@ -525,19 +578,20 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 static inline int mm_is_protected(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
-	if (unlikely(atomic_read(&mm->context.is_protected)))
+	if (unlikely(atomic_read(&mm->context.protected_count)))
 		return 1;
 #endif
 	return 0;
 }
 
-static inline int mm_alloc_pgste(struct mm_struct *mm)
+static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)
 {
-#ifdef CONFIG_PGSTE
-	if (unlikely(mm->context.alloc_pgste))
-		return 1;
-#endif
-	return 0;
+	return __pgste(pgste_val(pgste) & ~mask);
+}
+
+static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)
+{
+	return __pgste(pgste_val(pgste) | mask);
 }
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
@@ -571,10 +625,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
 }
 
 /*
- * In the case that a guest uses storage keys
- * faults should no longer be backed by zero pages
+ * As soon as the guest uses storage keys or enables PV, we deduplicate all
+ * mapped shared zeropages and prevent new shared zeropages from getting
+ * mapped.
  */
-#define mm_forbids_zeropage mm_has_pgste
+#define mm_forbids_zeropage mm_forbids_zeropage
+static inline int mm_forbids_zeropage(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (!mm->context.allow_cow_sharing)
+		return 1;
+#endif
+	return 0;
+}
+
 static inline int mm_uses_skeys(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
@@ -596,7 +660,15 @@ static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
 		: "cc");
 }
 
-static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+/**
+ * cspg() - Compare and Swap and Purge (CSPG)
+ * @ptr: Pointer to the value to be exchanged
+ * @old: The expected old value
+ * @new: The new value
+ *
+ * Return: True if compare and swap was successful, otherwise false.
+ */
+static inline bool cspg(unsigned long *ptr, unsigned long old, unsigned long new)
 {
 	union register_pair r1 = { .even = old, .odd = new, };
 	unsigned long address = (unsigned long)ptr | 1;
@@ -606,6 +678,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
 		: [r1] "+&d" (r1.pair), "+m" (*ptr)
 		: [address] "d" (address)
 		: "cc");
+	return old == r1.even;
 }
 
 #define CRDTE_DTT_PAGE		0x00UL
@@ -614,7 +687,18 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
 #define CRDTE_DTT_REGION2	0x18UL
 #define CRDTE_DTT_REGION1	0x1cUL
 
-static inline void crdte(unsigned long old, unsigned long new,
+/**
+ * crdte() - Compare and Replace DAT Table Entry
+ * @old:     The expected old value
+ * @new:     The new value
+ * @table:   Pointer to the value to be exchanged
+ * @dtt:     Table type of the table to be exchanged
+ * @address: The address mapped by the entry to be replaced
+ * @asce:    The ASCE of this entry
+ *
+ * Return: True if compare and replace was successful, otherwise false.
+ */
+static inline bool crdte(unsigned long old, unsigned long new,
 			 unsigned long *table, unsigned long dtt,
 			 unsigned long address, unsigned long asce)
 {
@@ -625,6 +709,7 @@ static inline void crdte(unsigned long old, unsigned long new,
 		     : [r1] "+&d" (r1.pair)
 		     : [r2] "d" (r2.pair), [asce] "a" (asce)
 		     : "memory", "cc");
+	return old == r1.even;
 }
 
 /*
@@ -700,7 +785,7 @@ static inline int pud_present(pud_t pud)
 {
 	if (pud_folded(pud))
 		return 1;
-	return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+	return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0;
 }
 
 static inline int pud_none(pud_t pud)
@@ -710,23 +795,28 @@ static inline int pud_none(pud_t pud)
 	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 
-#define pud_leaf	pud_large
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
 		return 0;
-	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+	return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0);
 }
 
-#define pmd_leaf	pmd_large
-static inline int pmd_large(pmd_t pmd)
+static inline int pmd_present(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0;
+}
+
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+	return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0);
 }
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_leaf(pmd))
 		return 1;
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
@@ -735,7 +825,7 @@ static inline int pud_bad(pud_t pud)
 {
 	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
 
-	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
+	if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
 		return 1;
 	if (type < _REGION_ENTRY_TYPE_R3)
 		return 0;
@@ -753,11 +843,6 @@ static inline int p4d_bad(p4d_t p4d)
 	return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
 }
 
-static inline int pmd_present(pmd_t pmd)
-{
-	return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
-}
-
 static inline int pmd_none(pmd_t pmd)
 {
 	return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
@@ -775,11 +860,13 @@ static inline int pud_write(pud_t pud)
 	return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
 }
 
+#define pmd_dirty pmd_dirty
 static inline int pmd_dirty(pmd_t pmd)
 {
 	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
 }
 
+#define pmd_young pmd_young
 static inline int pmd_young(pmd_t pmd)
 {
 	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
@@ -823,13 +910,12 @@ static inline int pte_protnone(pte_t pte)
 
 static inline int pmd_protnone(pmd_t pmd)
 {
-	/* pmd_large(pmd) implies pmd_present(pmd) */
-	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+	/* pmd_leaf(pmd) implies pmd_present(pmd) */
+	return pmd_leaf(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
 }
 #endif
 
-#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
 {
 	return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
 }
@@ -908,6 +994,7 @@ static inline int pte_unused(pte_t pte)
  * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
  * must not be set.
  */
+#define pte_pgprot pte_pgprot
 static inline pgprot_t pte_pgprot(pte_t pte)
 {
 	unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
@@ -1007,7 +1094,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
 	return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
 }
 
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkwrite_novma(pte_t pte)
 {
 	pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
 	if (pte_val(pte) & _PAGE_DIRTY)
@@ -1061,6 +1148,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
 #define IPTE_NODAT	0x400
 #define IPTE_GUEST_ASCE	0x800
 
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
+				       unsigned long opt, unsigned long asce,
+				       int local)
+{
+	unsigned long pto;
+
+	pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+	asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+		     : "+m" (*ptep)
+		     : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
+		       [asce] "a" (asce), [m4] "i" (local));
+}
+
 static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
 					unsigned long opt, unsigned long asce,
 					int local)
@@ -1140,7 +1240,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 	/* At this point the reference through the mapping is still present */
 	if (mm_is_protected(mm) && pte_present(res))
-		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+		uv_convert_from_secure_pte(res);
 	return res;
 }
 
@@ -1158,7 +1258,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 	res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
 	/* At this point the reference through the mapping is still present */
 	if (mm_is_protected(vma->vm_mm) && pte_present(res))
-		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+		uv_convert_from_secure_pte(res);
 	return res;
 }
 
@@ -1182,9 +1282,22 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	} else {
 		res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 	}
-	/* At this point the reference through the mapping is still present */
-	if (mm_is_protected(mm) && pte_present(res))
-		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	/* Nothing to do */
+	if (!mm_is_protected(mm) || !pte_present(res))
+		return res;
+	/*
+	 * At this point the reference through the mapping is still present.
+	 * The notifier should have destroyed all protected vCPUs at this
+	 * point, so the destroy should be successful.
+	 */
+	if (full && !uv_destroy_pte(res))
+		return res;
+	/*
+	 * If something went wrong and the page could not be destroyed, or
+	 * if this is not a mm teardown, the slower export is used as
+	 * fallback instead.
+	 */
+	uv_convert_from_secure_pte(res);
 	return res;
 }
 
@@ -1198,6 +1311,44 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 		ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
 }
 
+/*
+ * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
+ * bits in the comparison. Those might change e.g. because of dirty and young
+ * tracking.
+ */
+static inline int pte_allow_rdp(pte_t old, pte_t new)
+{
+	/*
+	 * Only allow changes from RO to RW
+	 */
+	if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
+		return 0;
+
+	return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
+}
+
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+						unsigned long address,
+						pte_t *ptep)
+{
+	/*
+	 * RDP might not have propagated the PTE protection reset to all CPUs,
+	 * so there could be spurious TLB protection faults.
+	 * NOTE: This will also be called when a racing pagetable update on
+	 * another thread already installed the correct PTE. Both cases cannot
+	 * really be distinguished.
+	 * Therefore, only do the local TLB flush when RDP can be used, and the
+	 * PTE does not have _PAGE_PROTECT set, to avoid unnecessary overhead.
+	 * A local RDP can be used to do the flush.
+	 */
+	if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
+		__ptep_rdp(address, ptep, 0, 0, 1);
+}
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+			 pte_t new);
+
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 					unsigned long addr, pte_t *ptep,
@@ -1205,7 +1356,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 {
 	if (pte_same(*ptep, entry))
 		return 0;
-	ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+	if (cpu_has_rdp() && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+		ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
+	else
+		ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
 	return 1;
 }
 
@@ -1250,24 +1404,37 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
 #define pgprot_writecombine	pgprot_writecombine
 pgprot_t pgprot_writecombine(pgprot_t prot);
 
-#define pgprot_writethrough	pgprot_writethrough
-pgprot_t pgprot_writethrough(pgprot_t prot);
+#define PFN_PTE_SHIFT		PAGE_SHIFT
 
 /*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified.  Thus, the following
- * hook is made available.
+ * Set multiple PTEs to consecutive pages with a single call.  All PTEs
+ * are within the same folio, PMD and VMA.
  */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-			      pte_t *ptep, pte_t entry)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t entry, unsigned int nr)
 {
 	if (pte_present(entry))
 		entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
-	if (mm_has_pgste(mm))
-		ptep_set_pte_at(mm, addr, ptep, entry);
-	else
-		set_pte(ptep, entry);
+	if (mm_has_pgste(mm)) {
+		for (;;) {
+			ptep_set_pte_at(mm, addr, ptep, entry);
+			if (--nr == 0)
+				break;
+			ptep++;
+			entry = __pte(pte_val(entry) + PAGE_SIZE);
+			addr += PAGE_SIZE;
+		}
+	} else {
+		for (;;) {
+			set_pte(ptep, entry);
+			if (--nr == 0)
+				break;
+			ptep++;
+			entry = __pte(pte_val(entry) + PAGE_SIZE);
+		}
+	}
 }
+#define set_ptes set_ptes
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
@@ -1278,21 +1445,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 	pte_t __pte;
 
 	__pte = __pte(physpage | pgprot_val(pgprot));
-	if (!MACHINE_HAS_NX)
-		__pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
 	return pte_mkyoung(__pte);
 }
 
-static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
-{
-	unsigned long physpage = page_to_phys(page);
-	pte_t __pte = mk_pte_phys(physpage, pgprot);
-
-	if (pte_write(__pte) && PageDirty(page))
-		__pte = pte_mkdirty(__pte);
-	return __pte;
-}
-
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
@@ -1306,7 +1461,7 @@ static inline unsigned long pmd_deref(pmd_t pmd)
 	unsigned long origin_mask;
 
 	origin_mask = _SEGMENT_ENTRY_ORIGIN;
-	if (pmd_large(pmd))
+	if (pmd_leaf(pmd))
 		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
 	return (unsigned long)__va(pmd_val(pmd) & origin_mask);
 }
@@ -1321,11 +1476,12 @@ static inline unsigned long pud_deref(pud_t pud)
 	unsigned long origin_mask;
 
 	origin_mask = _REGION_ENTRY_ORIGIN;
-	if (pud_large(pud))
+	if (pud_leaf(pud))
 		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
 	return (unsigned long)__va(pud_val(pud) & origin_mask);
 }
 
+#define pud_pfn pud_pfn
 static inline unsigned long pud_pfn(pud_t pud)
 {
 	return __pa(pud_deref(pud)) >> PAGE_SHIFT;
@@ -1423,7 +1579,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
 	return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
 }
 
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
+static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
 {
 	pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
 	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
@@ -1646,8 +1802,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
 {
-	if (!MACHINE_HAS_NX)
-		entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 	set_pmd(pmdp, entry);
 }
 
@@ -1689,8 +1843,10 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
 				   unsigned long addr, pmd_t *pmdp)
 {
-	pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+	pmd_t pmd;
 
+	VM_WARN_ON_ONCE(!pmd_present(*pmdp));
+	pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 	return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
 }
 
@@ -1713,17 +1869,16 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 #define pmdp_collapse_flush pmdp_collapse_flush
 
 #define pfn_pmd(pfn, pgprot)	mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
-#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
 
 static inline int pmd_trans_huge(pmd_t pmd)
 {
-	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+	return pmd_leaf(pmd);
 }
 
 #define has_transparent_hugepage has_transparent_hugepage
 static inline int has_transparent_hugepage(void)
 {
-	return MACHINE_HAS_EDAT1 ? 1 : 0;
+	return cpu_has_edat1() ? 1 : 0;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -1777,10 +1932,59 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)   (1)
+/*
+ * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE)
+ * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste
+ * as invalid.
+ * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010
+ * |			  offset			|Xtype |11TT|S0|
+ * |0000000000111111111122222222223333333333444444444455|555555|5566|66|
+ * |0123456789012345678901234567890123456789012345678901|234567|8901|23|
+ *
+ * Bits 0-51 store the offset.
+ * Bits 53-57 store the type.
+ * Bit 62 (S) is used for softdirty tracking.
+ * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT.
+ * Bit 52 (X) is unused.
+ */
+
+#define __SWP_OFFSET_MASK_RSTE	((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT_RSTE	12
+#define __SWP_TYPE_MASK_RSTE		((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT_RSTE	6
+
+/*
+ * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3
+ * bits 0x01. See also __set_huge_pte_at().
+ */
+static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset)
+{
+	unsigned long rste;
+
+	rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM;
+	rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE;
+	rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE;
+	return rste;
+}
+
+static inline unsigned long __swp_type_rste(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE;
+}
+
+static inline unsigned long __swp_offset_rste(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE;
+}
+
+#define __rste_to_swp_entry(rste)	((swp_entry_t) { rste })
 
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
 extern int s390_enable_sie(void);
 extern int s390_enable_skey(void);
 extern void s390_reset_cmma(struct mm_struct *mm);
@@ -1792,4 +1996,18 @@ extern void s390_reset_cmma(struct mm_struct *mm);
 #define pmd_pgtable(pmd) \
 	((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
 
+static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
+{
+	unsigned long *pgstes, res;
+
+	pgstes = pgt + _PAGE_ENTRIES;
+
+	res = (pgstes[0] & PGSTE_ST2_MASK) << 16;
+	res |= pgstes[1] & PGSTE_ST2_MASK;
+	res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16;
+	res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32;
+
+	return res;
+}
+
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
new file mode 100644
index 000000000000..7ef3bbec98b0
--- /dev/null
+++ b/arch/s390/include/asm/physmem_info.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MEM_DETECT_H
+#define _ASM_S390_MEM_DETECT_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+enum physmem_info_source {
+	MEM_DETECT_NONE = 0,
+	MEM_DETECT_SCLP_STOR_INFO,
+	MEM_DETECT_DIAG260,
+	MEM_DETECT_DIAG500_STOR_LIMIT,
+	MEM_DETECT_SCLP_READ_INFO,
+	MEM_DETECT_BIN_SEARCH
+};
+
+struct physmem_range {
+	u64 start;
+	u64 end;
+};
+
+enum reserved_range_type {
+	RR_DECOMPRESSOR,
+	RR_INITRD,
+	RR_VMLINUX,
+	RR_AMODE31,
+	RR_IPLREPORT,
+	RR_CERT_COMP_LIST,
+	RR_MEM_DETECT_EXT,
+	RR_VMEM,
+	RR_MAX
+};
+
+struct reserved_range {
+	unsigned long start;
+	unsigned long end;
+	struct reserved_range *chain;
+};
+
+/*
+ * Storage element id is defined as 1 byte (up to 256 storage elements).
+ * In practise only storage element id 0 and 1 are used).
+ * According to architecture one storage element could have as much as
+ * 1020 subincrements. 255 physmem_ranges are embedded in physmem_info.
+ * If more physmem_ranges are required, a block of memory from already
+ * known physmem_range is taken (online_extended points to it).
+ */
+#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */
+
+struct physmem_info {
+	u32 range_count;
+	u8 info_source;
+	unsigned long usable;
+	struct reserved_range reserved[RR_MAX];
+	struct physmem_range online[MEM_INLINED_ENTRIES];
+	struct physmem_range *online_extended;
+};
+
+extern struct physmem_info physmem_info;
+
+void add_physmem_online_range(u64 start, u64 end);
+
+static inline int __get_physmem_range(u32 n, unsigned long *start,
+				      unsigned long *end, bool respect_usable_limit)
+{
+	if (n >= physmem_info.range_count) {
+		*start = 0;
+		*end = 0;
+		return -1;
+	}
+
+	if (n < MEM_INLINED_ENTRIES) {
+		*start = (unsigned long)physmem_info.online[n].start;
+		*end = (unsigned long)physmem_info.online[n].end;
+	} else {
+		*start = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].start;
+		*end = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].end;
+	}
+
+	if (respect_usable_limit && physmem_info.usable) {
+		if (*start >= physmem_info.usable)
+			return -1;
+		if (*end > physmem_info.usable)
+			*end = physmem_info.usable;
+	}
+	return 0;
+}
+
+/**
+ * for_each_physmem_usable_range - early online memory range iterator
+ * @i: an integer used as loop variable
+ * @p_start: ptr to unsigned long for start address of the range
+ * @p_end: ptr to unsigned long for end address of the range
+ *
+ * Walks over detected online memory ranges below usable limit.
+ */
+#define for_each_physmem_usable_range(i, p_start, p_end)		\
+	for (i = 0; !__get_physmem_range(i, p_start, p_end, true); i++)
+
+/* Walks over all detected online memory ranges disregarding usable limit. */
+#define for_each_physmem_online_range(i, p_start, p_end)		\
+	for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++)
+
+static inline const char *get_physmem_info_source(void)
+{
+	switch (physmem_info.info_source) {
+	case MEM_DETECT_SCLP_STOR_INFO:
+		return "sclp storage info";
+	case MEM_DETECT_DIAG260:
+		return "diag260";
+	case MEM_DETECT_DIAG500_STOR_LIMIT:
+		return "diag500 storage limit";
+	case MEM_DETECT_SCLP_READ_INFO:
+		return "sclp read info";
+	case MEM_DETECT_BIN_SEARCH:
+		return "binary search";
+	}
+	return "none";
+}
+
+#define RR_TYPE_NAME(t) case RR_ ## t: return #t
+static inline const char *get_rr_type_name(enum reserved_range_type t)
+{
+	switch (t) {
+	RR_TYPE_NAME(DECOMPRESSOR);
+	RR_TYPE_NAME(INITRD);
+	RR_TYPE_NAME(VMLINUX);
+	RR_TYPE_NAME(AMODE31);
+	RR_TYPE_NAME(IPLREPORT);
+	RR_TYPE_NAME(CERT_COMP_LIST);
+	RR_TYPE_NAME(MEM_DETECT_EXT);
+	RR_TYPE_NAME(VMEM);
+	default:
+		return "UNKNOWN";
+	}
+}
+
+#define for_each_physmem_reserved_type_range(t, range, p_start, p_end)				\
+	for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end;	\
+	     range && range->end; range = range->chain ? __va(range->chain) : NULL,		\
+	     *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
+static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
+							     struct reserved_range *range)
+{
+	if (!range) {
+		range = &physmem_info.reserved[*t];
+		if (range->end)
+			return range;
+	}
+	if (range->chain)
+		return __va(range->chain);
+	while (++*t < RR_MAX) {
+		range = &physmem_info.reserved[*t];
+		if (range->end)
+			return range;
+	}
+	return NULL;
+}
+
+#define for_each_physmem_reserved_range(t, range, p_start, p_end)			\
+	for (t = 0, range = __physmem_reserved_next(&t, NULL),			\
+	    *p_start = range ? range->start : 0, *p_end = range ? range->end : 0;	\
+	     range; range = __physmem_reserved_next(&t, range),			\
+	    *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
+static inline unsigned long get_physmem_reserved(enum reserved_range_type type,
+						 unsigned long *addr, unsigned long *size)
+{
+	*addr = physmem_info.reserved[type].start;
+	*size = physmem_info.reserved[type].end - physmem_info.reserved[type].start;
+	return *size;
+}
+
+#define AMODE31_START	(physmem_info.reserved[RR_AMODE31].start)
+#define AMODE31_END	(physmem_info.reserved[RR_AMODE31].end)
+
+#endif
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index dd3d20c332ac..b7b59faf16f4 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -2,7 +2,7 @@
 /*
  * Kernelspace interface to the pkey device driver
  *
- * Copyright IBM Corp. 2016,2019
+ * Copyright IBM Corp. 2016, 2023
  *
  * Author: Harald Freudenberger <freude@de.ibm.com>
  *
@@ -20,9 +20,22 @@
  * @param key pointer to a buffer containing the key blob
  * @param keylen size of the key blob in bytes
  * @param protkey pointer to buffer receiving the protected key
+ * @param xflags additional execution flags (see PKEY_XFLAG_* definitions below)
+ *	  As of now the only supported flag is PKEY_XFLAG_NOMEMALLOC.
  * @return 0 on success, negative errno value on failure
  */
-int pkey_keyblob2pkey(const u8 *key, u32 keylen,
-		      struct pkey_protkey *protkey);
+int pkey_key2protkey(const u8 *key, u32 keylen,
+		     u8 *protkey, u32 *protkeylen, u32 *protkeytype,
+		     u32 xflags);
+
+/*
+ * If this flag is given in the xflags parameter, the pkey implementation
+ * is not allowed to allocate memory but instead should fall back to use
+ * preallocated memory or simple fail with -ENOMEM.
+ * This flag is for protected key derive within a cipher or similar
+ * which must not allocate memory which would cause io operations - see
+ * also the CRYPTO_ALG_ALLOCATES_MEMORY flag in crypto.h.
+ */
+#define PKEY_XFLAG_NOMEMALLOC 0x0001
 
 #endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbc..6ccd033acfe5 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -5,46 +5,62 @@
 #include <asm/current.h>
 #include <linux/thread_info.h>
 #include <asm/atomic_ops.h>
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#include <asm/cmpxchg.h>
+#include <asm/march.h>
 
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
+
+/*
+ * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
+ * that a decrement hitting 0 means we can and should reschedule.
+ */
 #define PREEMPT_ENABLED	(0 + PREEMPT_NEED_RESCHED)
 
-static inline int preempt_count(void)
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
 {
-	return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+	return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
 }
 
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
 {
 	int old, new;
 
+	old = READ_ONCE(get_lowcore()->preempt_count);
 	do {
-		old = READ_ONCE(S390_lowcore.preempt_count);
-		new = (old & PREEMPT_NEED_RESCHED) |
-			(pc & ~PREEMPT_NEED_RESCHED);
-	} while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
-				  old, new) != old);
+		new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED);
+	} while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new));
 }
 
-static inline void set_preempt_need_resched(void)
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * short instruction sequence.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
 {
-	__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+	__atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
 }
 
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
 {
-	__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+	__atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
 }
 
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
 {
-	return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+	return !(READ_ONCE(get_lowcore()->preempt_count) & PREEMPT_NEED_RESCHED);
 }
 
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
 {
 	/*
 	 * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
@@ -52,88 +68,59 @@ static inline void __preempt_count_add(int val)
 	 */
 	if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
 		if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
-			__atomic_add_const(val, &S390_lowcore.preempt_count);
+			__atomic_add_const(val, &get_lowcore()->preempt_count);
 			return;
 		}
 	}
-	__atomic_add(val, &S390_lowcore.preempt_count);
+	__atomic_add(val, &get_lowcore()->preempt_count);
 }
 
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
 {
 	__preempt_count_add(-val);
 }
 
-static inline bool __preempt_count_dec_and_test(void)
+/*
+ * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule
+ * a decrement which hits zero means we have no preempt_count and should
+ * reschedule.
+ */
+static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+	return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count);
 }
 
-static inline bool should_resched(int preempt_offset)
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(int preempt_offset)
 {
-	return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
-			preempt_offset);
+	return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset);
 }
 
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-#define PREEMPT_ENABLED	(0)
-
-static inline int preempt_count(void)
-{
-	return READ_ONCE(S390_lowcore.preempt_count);
-}
-
-static inline void preempt_count_set(int pc)
-{
-	S390_lowcore.preempt_count = pc;
-}
-
-static inline void set_preempt_need_resched(void)
-{
-}
-
-static inline void clear_preempt_need_resched(void)
-{
-}
+#define init_task_preempt_count(p)	do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu)	do { } while (0)
 
-static inline bool test_preempt_need_resched(void)
-{
-	return false;
-}
+#ifdef CONFIG_PREEMPTION
 
-static inline void __preempt_count_add(int val)
-{
-	S390_lowcore.preempt_count += val;
-}
+void preempt_schedule(void);
+void preempt_schedule_notrace(void);
 
-static inline void __preempt_count_sub(int val)
-{
-	S390_lowcore.preempt_count -= val;
-}
+#ifdef CONFIG_PREEMPT_DYNAMIC
 
-static inline bool __preempt_count_dec_and_test(void)
-{
-	return !--S390_lowcore.preempt_count && tif_need_resched();
-}
+void dynamic_preempt_schedule(void);
+void dynamic_preempt_schedule_notrace(void);
+#define __preempt_schedule()		dynamic_preempt_schedule()
+#define __preempt_schedule_notrace()	dynamic_preempt_schedule_notrace()
 
-static inline bool should_resched(int preempt_offset)
-{
-	return unlikely(preempt_count() == preempt_offset &&
-			tif_need_resched());
-}
+#else /* CONFIG_PREEMPT_DYNAMIC */
 
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+#define __preempt_schedule()		preempt_schedule()
+#define __preempt_schedule_notrace()	preempt_schedule_notrace()
 
-#define init_task_preempt_count(p)	do { } while (0)
-/* Deferred to CPU bringup time */
-#define init_idle_preempt_count(p, cpu)	do { } while (0)
+#endif /* CONFIG_PREEMPT_DYNAMIC */
 
-#ifdef CONFIG_PREEMPTION
-extern void preempt_schedule(void);
-#define __preempt_schedule() preempt_schedule()
-extern void preempt_schedule_notrace(void);
-#define __preempt_schedule_notrace() preempt_schedule_notrace()
 #endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bd66f8e34949..6c8063cb8fe7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -15,13 +15,11 @@
 #include <linux/bits.h>
 
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
-#define CIF_FPU			3	/* restore FPU registers */
 #define CIF_ENABLED_WAIT	5	/* in enabled wait state */
 #define CIF_MCCK_GUEST		6	/* machine check happening in guest */
 #define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */
 
 #define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
-#define _CIF_FPU		BIT(CIF_FPU)
 #define _CIF_ENABLED_WAIT	BIT(CIF_ENABLED_WAIT)
 #define _CIF_MCCK_GUEST		BIT(CIF_MCCK_GUEST)
 #define _CIF_DEDICATED_CPU	BIT(CIF_DEDICATED_CPU)
@@ -33,40 +31,68 @@
 #include <linux/cpumask.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
+#include <linux/bitops.h>
+#include <asm/fpu-types.h>
 #include <asm/cpu.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/internal.h>
 #include <asm/irqflags.h>
+#include <asm/alternative.h>
+#include <asm/fault.h>
+
+struct pcpu {
+	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
+	unsigned long ec_clk;		/* sigp timestamp for ec_xxx */
+	unsigned long flags;		/* per CPU flags */
+	unsigned long capacity;		/* cpu capacity for scheduler */
+	signed char state;		/* physical cpu state */
+	signed char polarization;	/* physical polarization */
+	u16 address;			/* physical cpu address */
+};
+
+DECLARE_PER_CPU(struct pcpu, pcpu_devices);
 
 typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
 
-static inline void set_cpu_flag(int flag)
+static __always_inline struct pcpu *this_pcpu(void)
+{
+	return (struct pcpu *)(get_lowcore()->pcpu);
+}
+
+static __always_inline void set_cpu_flag(int flag)
+{
+	set_bit(flag, &this_pcpu()->flags);
+}
+
+static __always_inline void clear_cpu_flag(int flag)
+{
+	clear_bit(flag, &this_pcpu()->flags);
+}
+
+static __always_inline bool test_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags |= (1UL << flag);
+	return test_bit(flag, &this_pcpu()->flags);
 }
 
-static inline void clear_cpu_flag(int flag)
+static __always_inline bool test_and_set_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags &= ~(1UL << flag);
+	return test_and_set_bit(flag, &this_pcpu()->flags);
 }
 
-static inline int test_cpu_flag(int flag)
+static __always_inline bool test_and_clear_cpu_flag(int flag)
 {
-	return !!(S390_lowcore.cpu_flags & (1UL << flag));
+	return test_and_clear_bit(flag, &this_pcpu()->flags);
 }
 
 /*
  * Test CIF flag of another CPU. The caller needs to ensure that
  * CPU hotplug can not happen, e.g. by disabling preemption.
  */
-static inline int test_cpu_flag_of(int flag, int cpu)
+static __always_inline bool test_cpu_flag_of(int flag, int cpu)
 {
-	struct lowcore *lc = lowcore_ptr[cpu];
-	return !!(lc->cpu_flags & (1UL << flag));
+	return test_bit(flag, &per_cpu(pcpu_devices, cpu).flags);
 }
 
 #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -76,13 +102,21 @@ static inline void get_cpu_id(struct cpuid *ptr)
 	asm volatile("stidp %0" : "=Q" (*ptr));
 }
 
+static __always_inline unsigned long get_cpu_timer(void)
+{
+	unsigned long timer;
+
+	asm volatile("stpt	%[timer]" : [timer] "=Q" (timer));
+	return timer;
+}
+
 void s390_adjust_jiffies(void);
 void s390_update_cpu_mhz(void);
 void cpu_detect_mhz_feature(void);
 
 extern const struct seq_operations cpuinfo_op;
 extern void execve_tail(void);
-extern void __bpon(void);
+unsigned long vdso_text_size(void);
 unsigned long vdso_size(void);
 
 /*
@@ -102,6 +136,40 @@ unsigned long vdso_size(void);
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
+#define __stackleak_poison __stackleak_poison
+static __always_inline void __stackleak_poison(unsigned long erase_low,
+					       unsigned long erase_high,
+					       unsigned long poison)
+{
+	unsigned long tmp, count;
+
+	count = erase_high - erase_low;
+	if (!count)
+		return;
+	asm volatile(
+		"	cghi	%[count],8\n"
+		"	je	2f\n"
+		"	aghi	%[count],-(8+1)\n"
+		"	srlg	%[tmp],%[count],8\n"
+		"	ltgr	%[tmp],%[tmp]\n"
+		"	jz	1f\n"
+		"0:	stg	%[poison],0(%[addr])\n"
+		"	mvc	8(256-8,%[addr]),0(%[addr])\n"
+		"	la	%[addr],256(%[addr])\n"
+		"	brctg	%[tmp],0b\n"
+		"1:	stg	%[poison],0(%[addr])\n"
+		"	exrl	%[count],3f\n"
+		"	j	4f\n"
+		"2:	stg	%[poison],0(%[addr])\n"
+		"	j	4f\n"
+		"3:	mvc	8(1,%[addr]),0(%[addr])\n"
+		"4:\n"
+		: [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp)
+		: [poison] "d" (poison)
+		: "memory", "cc"
+		);
+}
+
 /*
  * Thread structure
  */
@@ -114,10 +182,10 @@ struct thread_struct {
 	unsigned long hardirq_timer;		/* task cputime in hardirq context */
 	unsigned long softirq_timer;		/* task cputime in softirq context */
 	const sys_call_ptr_t *sys_call_table;	/* system call table address */
-	unsigned long gmap_addr;		/* address of last gmap fault. */
-	unsigned int gmap_write_flag;		/* gmap fault write indication */
+	union teid gmap_teid;			/* address and flags of last gmap fault */
 	unsigned int gmap_int_code;		/* int code of last gmap fault */
-	unsigned int gmap_pfault;		/* signal of a pending guest pfault */
+	int ufpu_flags;				/* user fpu flags */
+	int kfpu_flags;				/* kernel fpu flags */
 
 	/* Per-thread information related to debugging */
 	struct per_regs per_user;		/* User specified PER registers */
@@ -133,11 +201,8 @@ struct thread_struct {
 	struct gs_cb *gs_cb;			/* Current guarded storage cb */
 	struct gs_cb *gs_bc_cb;			/* Broadcast guarded storage cb */
 	struct pgm_tdb trap_tdb;		/* Transaction abort diagnose block */
-	/*
-	 * Warning: 'fpu' is dynamically-sized. It *MUST* be at
-	 * the end.
-	 */
-	struct fpu fpu;			/* FP and VX register save area */
+	struct fpu ufpu;			/* User FP and VX register save area */
+	struct fpu kfpu;			/* Kernel FP and VX register save area */
 };
 
 /* Flag to disable transactions. */
@@ -156,7 +221,6 @@ typedef struct thread_struct thread_struct;
 
 #define INIT_THREAD {							\
 	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
-	.fpu.regs = (void *) init_task.thread.fpu.fprs,			\
 	.last_break = 1,						\
 }
 
@@ -177,7 +241,6 @@ typedef struct thread_struct thread_struct;
 	execve_tail();							\
 } while (0)
 
-/* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
 struct seq_file;
@@ -186,9 +249,6 @@ struct pt_regs;
 void show_registers(struct pt_regs *regs);
 void show_cacheinfo(struct seq_file *m);
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *tsk) { }
-
 /* Free guarded storage control block */
 void guarded_storage_release(struct task_struct *tsk);
 void gs_load_bc_cb(struct pt_regs *regs);
@@ -202,7 +262,23 @@ unsigned long __get_wchan(struct task_struct *p);
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
-register unsigned long current_stack_pointer asm("r15");
+/* avoid using global register due to gcc bug in versions < 8.4 */
+#define current_stack_pointer (__current_stack_pointer())
+
+static __always_inline unsigned long __current_stack_pointer(void)
+{
+	unsigned long sp;
+
+	asm volatile("lgr %0,15" : "=d" (sp));
+	return sp;
+}
+
+static __always_inline bool on_thread_stack(void)
+{
+	unsigned long ksp = get_lowcore()->kernel_stack;
+
+	return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+}
 
 static __always_inline unsigned short stap(void)
 {
@@ -244,8 +320,8 @@ static inline void __load_psw(psw_t psw)
  */
 static __always_inline void __load_psw_mask(unsigned long mask)
 {
+	psw_t psw __uninitialized;
 	unsigned long addr;
-	psw_t psw;
 
 	psw.mask = mask;
 
@@ -268,14 +344,36 @@ static inline unsigned long __extract_psw(void)
 	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
 }
 
-static inline void local_mcck_enable(void)
+static inline unsigned long __local_mcck_save(void)
 {
-	__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+	unsigned long mask = __extract_psw();
+
+	__load_psw_mask(mask & ~PSW_MASK_MCHECK);
+	return mask & PSW_MASK_MCHECK;
+}
+
+#define local_mcck_save(mflags)			\
+do {						\
+	typecheck(unsigned long, mflags);	\
+	mflags = __local_mcck_save();		\
+} while (0)
+
+static inline void local_mcck_restore(unsigned long mflags)
+{
+	unsigned long mask = __extract_psw();
+
+	mask &= ~PSW_MASK_MCHECK;
+	__load_psw_mask(mask | mflags);
 }
 
 static inline void local_mcck_disable(void)
 {
-	__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+	__local_mcck_save();
+}
+
+static inline void local_mcck_enable(void)
+{
+	__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
 }
 
 /*
@@ -306,31 +404,20 @@ static __always_inline void __noreturn disabled_wait(void)
 
 #define ARCH_LOW_ADDRESS_LIMIT	0x7fffffffUL
 
-extern int memcpy_real(void *, unsigned long, size_t);
-extern void memcpy_absolute(void *, void *, size_t);
-
-#define put_abs_lowcore(member, x) do {					\
-	unsigned long __abs_address = offsetof(struct lowcore, member);	\
-	__typeof__(((struct lowcore *)0)->member) __tmp = (x);		\
-									\
-	memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp));	\
-} while (0)
-
-#define get_abs_lowcore(x, member) do {					\
-	unsigned long __abs_address = offsetof(struct lowcore, member);	\
-	__typeof__(((struct lowcore *)0)->member) *__ptr = &(x);	\
-									\
-	memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr));	\
-} while (0)
-
-extern int s390_isolate_bp(void);
-extern int s390_isolate_bp_guest(void);
-
 static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
 {
 	return arch_irqs_disabled_flags(regs->psw.mask);
 }
 
+static __always_inline void bpon(void)
+{
+	asm_inline volatile(
+		ALTERNATIVE("	nop\n",
+			    "	.insn	rrf,0xb2e80000,0,0,13,0\n",
+			    ALT_SPEC(82))
+		);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
deleted file mode 100644
index f960b2896606..000000000000
--- a/arch/s390/include/asm/ptdump.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _ASM_S390_PTDUMP_H
-#define _ASM_S390_PTDUMP_H
-
-void ptdump_check_wx(void);
-
-static inline void debug_checkwx(void)
-{
-	if (IS_ENABLED(CONFIG_DEBUG_WX))
-		ptdump_check_wx();
-}
-
-#endif /* _ASM_S390_PTDUMP_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 8bae33ab320a..62c0ab4a4b9d 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -9,28 +9,53 @@
 
 #include <linux/bits.h>
 #include <uapi/asm/ptrace.h>
+#include <asm/thread_info.h>
 #include <asm/tpi.h>
 
 #define PIF_SYSCALL			0	/* inside a system call */
-#define PIF_EXECVE_PGSTE_RESTART	1	/* restart execve for PGSTE binaries */
 #define PIF_SYSCALL_RET_SET		2	/* return value was set via ptrace */
 #define PIF_GUEST_FAULT			3	/* indicates program check in sie64a */
 #define PIF_FTRACE_FULL_REGS		4	/* all register contents valid (ftrace) */
 
 #define _PIF_SYSCALL			BIT(PIF_SYSCALL)
-#define _PIF_EXECVE_PGSTE_RESTART	BIT(PIF_EXECVE_PGSTE_RESTART)
 #define _PIF_SYSCALL_RET_SET		BIT(PIF_SYSCALL_RET_SET)
 #define _PIF_GUEST_FAULT		BIT(PIF_GUEST_FAULT)
 #define _PIF_FTRACE_FULL_REGS		BIT(PIF_FTRACE_FULL_REGS)
 
-#ifndef __ASSEMBLY__
+#define PSW32_MASK_PER		_AC(0x40000000, UL)
+#define PSW32_MASK_DAT		_AC(0x04000000, UL)
+#define PSW32_MASK_IO		_AC(0x02000000, UL)
+#define PSW32_MASK_EXT		_AC(0x01000000, UL)
+#define PSW32_MASK_KEY		_AC(0x00F00000, UL)
+#define PSW32_MASK_BASE		_AC(0x00080000, UL)	/* Always one */
+#define PSW32_MASK_MCHECK	_AC(0x00040000, UL)
+#define PSW32_MASK_WAIT		_AC(0x00020000, UL)
+#define PSW32_MASK_PSTATE	_AC(0x00010000, UL)
+#define PSW32_MASK_ASC		_AC(0x0000C000, UL)
+#define PSW32_MASK_CC		_AC(0x00003000, UL)
+#define PSW32_MASK_PM		_AC(0x00000f00, UL)
+#define PSW32_MASK_RI		_AC(0x00000080, UL)
+
+#define PSW32_ADDR_AMODE	_AC(0x80000000, UL)
+#define PSW32_ADDR_INSN		_AC(0x7FFFFFFF, UL)
+
+#define PSW32_DEFAULT_KEY	((PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY	_AC(0x00000000, UL)
+#define PSW32_ASC_ACCREG	_AC(0x00004000, UL)
+#define PSW32_ASC_SECONDARY	_AC(0x00008000, UL)
+#define PSW32_ASC_HOME		_AC(0x0000C000, UL)
+
+#define PSW_DEFAULT_KEY			((PAGE_DEFAULT_ACC) << 52)
 
 #define PSW_KERNEL_BITS	(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
-			 PSW_MASK_EA | PSW_MASK_BA)
+			 PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT)
 #define PSW_USER_BITS	(PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
 			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
 			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
 
+#ifndef __ASSEMBLY__
+
 struct psw_bits {
 	unsigned long	     :	1;
 	unsigned long per    :	1; /* PER-Mask */
@@ -71,30 +96,6 @@ enum {
 	&(*(struct psw_bits *)(&(__psw)));	\
 }))
 
-#define PSW32_MASK_PER		0x40000000UL
-#define PSW32_MASK_DAT		0x04000000UL
-#define PSW32_MASK_IO		0x02000000UL
-#define PSW32_MASK_EXT		0x01000000UL
-#define PSW32_MASK_KEY		0x00F00000UL
-#define PSW32_MASK_BASE		0x00080000UL	/* Always one */
-#define PSW32_MASK_MCHECK	0x00040000UL
-#define PSW32_MASK_WAIT		0x00020000UL
-#define PSW32_MASK_PSTATE	0x00010000UL
-#define PSW32_MASK_ASC		0x0000C000UL
-#define PSW32_MASK_CC		0x00003000UL
-#define PSW32_MASK_PM		0x00000f00UL
-#define PSW32_MASK_RI		0x00000080UL
-
-#define PSW32_ADDR_AMODE	0x80000000UL
-#define PSW32_ADDR_INSN		0x7FFFFFFFUL
-
-#define PSW32_DEFAULT_KEY	(((u32)PAGE_DEFAULT_ACC) << 20)
-
-#define PSW32_ASC_PRIMARY	0x00000000UL
-#define PSW32_ASC_ACCREG	0x00004000UL
-#define PSW32_ASC_SECONDARY	0x00008000UL
-#define PSW32_ASC_HOME		0x0000C000UL
-
 typedef struct {
 	unsigned int mask;
 	unsigned int addr;
@@ -126,7 +127,6 @@ struct pt_regs {
 		struct tpi_info tpi_info;
 	};
 	unsigned long flags;
-	unsigned long cr1;
 	unsigned long last_break;
 };
 
@@ -201,6 +201,10 @@ static inline int test_and_clear_pt_regs_flag(struct pt_regs *regs, int flag)
 	return ret;
 }
 
+struct task_struct;
+
+void update_cr_regs(struct task_struct *task);
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
@@ -225,8 +229,44 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
 
 int regs_query_register_offset(const char *name);
 const char *regs_query_register_name(unsigned int offset);
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->gprs[15];
+}
+
+static __always_inline unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return 0;
+	return regs->gprs[offset];
+}
+
+static __always_inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	unsigned long ksp = kernel_stack_pointer(regs);
+
+	return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long addr;
+
+	addr = kernel_stack_pointer(regs) + n * sizeof(long);
+	if (!regs_within_kernel_stack(regs, addr))
+		return 0;
+	return READ_ONCE_NOCHECK(addr);
+}
 
 /**
  * regs_get_kernel_argument() - get Nth function argument in kernel
@@ -247,11 +287,6 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
 	return regs_get_kernel_stack_nth(regs, argoffset + n);
 }
 
-static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
-	return regs->gprs[15];
-}
-
 static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 {
 	regs->gprs[2] = rc;
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 2f983e0b95e0..69c4ead0c332 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -9,8 +9,9 @@
 #define __QDIO_H__
 
 #include <linux/interrupt.h>
-#include <asm/cio.h>
+#include <asm/dma-types.h>
 #include <asm/ccwdev.h>
+#include <asm/cio.h>
 
 /* only use 4 queues to save some cachelines */
 #define QDIO_MAX_QUEUES_PER_IRQ		4
@@ -34,9 +35,9 @@
  * @dkey: access key for SLSB
  */
 struct qdesfmt0 {
-	u64 sliba;
-	u64 sla;
-	u64 slsba;
+	dma64_t sliba;
+	dma64_t sla;
+	dma64_t slsba;
 	u32	 : 32;
 	u32 akey : 4;
 	u32 bkey : 4;
@@ -74,7 +75,7 @@ struct qdr {
 	/* private: */
 	u32 res[9];
 	/* public: */
-	u64 qiba;
+	dma64_t qiba;
 	u32	   : 32;
 	u32 qkey   : 4;
 	u32	   : 28;
@@ -146,7 +147,7 @@ struct qaob {
 	u8 flags;
 	u16 cbtbs;
 	u8 sb_count;
-	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	dma64_t sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
 	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
 	u64 user0;
 	u64 res4[2];
@@ -208,7 +209,7 @@ struct qdio_buffer_element {
 	u8 scount;
 	u8 sflags;
 	u32 length;
-	u64 addr;
+	dma64_t addr;
 } __attribute__ ((packed, aligned(16)));
 
 /**
@@ -224,7 +225,7 @@ struct qdio_buffer {
  * @sbal: absolute SBAL address
  */
 struct sl_element {
-	u64 sbal;
+	dma64_t sbal;
 } __attribute__ ((packed));
 
 /**
diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h
new file mode 100644
index 000000000000..17878b1d048c
--- /dev/null
+++ b/arch/s390/include/asm/runtime-const.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_RUNTIME_CONST_H
+#define _ASM_S390_RUNTIME_CONST_H
+
+#include <linux/uaccess.h>
+
+#define runtime_const_ptr(sym)					\
+({								\
+	typeof(sym) __ret;					\
+								\
+	asm_inline(						\
+		"0:	iihf	%[__ret],%[c1]\n"		\
+		"	iilf	%[__ret],%[c2]\n"		\
+		".pushsection runtime_ptr_" #sym ",\"a\"\n"	\
+		".long 0b - .\n"				\
+		".popsection"					\
+		: [__ret] "=d" (__ret)				\
+		: [c1] "i" (0x01234567UL),			\
+		  [c2] "i" (0x89abcdefUL));			\
+	__ret;							\
+})
+
+#define runtime_const_shift_right_32(val, sym)			\
+({								\
+	unsigned int __ret = (val);				\
+								\
+	asm_inline(						\
+		"0:	srl	%[__ret],12\n"			\
+		".pushsection runtime_shift_" #sym ",\"a\"\n"	\
+		".long 0b - .\n"				\
+		".popsection"					\
+		: [__ret] "+d" (__ret));			\
+	__ret;							\
+})
+
+#define runtime_const_init(type, sym) do {			\
+	extern s32 __start_runtime_##type##_##sym[];		\
+	extern s32 __stop_runtime_##type##_##sym[];		\
+								\
+	runtime_const_fixup(__runtime_fixup_##type,		\
+			    (unsigned long)(sym),		\
+			    __start_runtime_##type##_##sym,	\
+			    __stop_runtime_##type##_##sym);	\
+} while (0)
+
+/* 32-bit immediate for iihf and iilf in bits in I2 field */
+static inline void __runtime_fixup_32(u32 *p, unsigned int val)
+{
+	s390_kernel_write(p, &val, sizeof(val));
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+	__runtime_fixup_32(where + 2, val >> 32);
+	__runtime_fixup_32(where + 8, val);
+}
+
+/* Immediate value is lower 12 bits of D2 field of srl */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+	u32 insn = *(u32 *)where;
+
+	insn &= 0xfffff000;
+	insn |= (val & 63);
+	s390_kernel_write(where, &insn, sizeof(insn));
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+				       unsigned long val, s32 *start, s32 *end)
+{
+	while (start < end) {
+		fn(*start + (void *)start, val);
+		start++;
+	}
+}
+
+#endif /* _ASM_S390_RUNTIME_CONST_H */
diff --git a/arch/s390/include/asm/rwonce.h b/arch/s390/include/asm/rwonce.h
new file mode 100644
index 000000000000..91fc24520e82
--- /dev/null
+++ b/arch/s390/include/asm/rwonce.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_S390_RWONCE_H
+#define __ASM_S390_RWONCE_H
+
+#include <linux/compiler_types.h>
+
+/*
+ * Use READ_ONCE_ALIGNED_128() for 128-bit block concurrent (atomic) read
+ * accesses. Note that x must be 128-bit aligned, otherwise a specification
+ * exception is generated.
+ */
+#define READ_ONCE_ALIGNED_128(x)			\
+({							\
+	union {						\
+		typeof(x) __x;				\
+		__uint128_t val;			\
+	} __u;						\
+							\
+	BUILD_BUG_ON(sizeof(x) != 16);			\
+	asm volatile(					\
+		"	lpq	%[val],%[_x]\n"		\
+		: [val] "=d" (__u.val)			\
+		: [_x] "QS" (x)				\
+		: "memory");				\
+	__u.__x;					\
+})
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_S390_RWONCE_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 236b34b75ddb..1e62919bacf4 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -16,7 +16,13 @@
 /* 24 + 16 * SCLP_MAX_CORES */
 #define EXT_SCCB_READ_CPU	(3 * PAGE_SIZE)
 
+#define SCLP_ERRNOTIFY_AQ_RESET			0
+#define SCLP_ERRNOTIFY_AQ_REPAIR		1
+#define SCLP_ERRNOTIFY_AQ_INFO_LOG		2
+#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA		3
+
 #ifndef __ASSEMBLY__
+#include <linux/uio.h>
 #include <asm/chpid.h>
 #include <asm/cpu.h>
 
@@ -71,6 +77,7 @@ struct sclp_info {
 	unsigned char has_core_type : 1;
 	unsigned char has_sprp : 1;
 	unsigned char has_hvs : 1;
+	unsigned char has_wti : 1;
 	unsigned char has_esca : 1;
 	unsigned char has_sief2 : 1;
 	unsigned char has_64bscao : 1;
@@ -83,11 +90,20 @@ struct sclp_info {
 	unsigned char has_ibs : 1;
 	unsigned char has_skey : 1;
 	unsigned char has_kss : 1;
+	unsigned char has_diag204_bif : 1;
 	unsigned char has_gisaf : 1;
+	unsigned char has_diag310 : 1;
 	unsigned char has_diag318 : 1;
+	unsigned char has_diag320 : 1;
+	unsigned char has_diag324 : 1;
 	unsigned char has_sipl : 1;
+	unsigned char has_sipl_eckd : 1;
 	unsigned char has_dirq : 1;
 	unsigned char has_iplcc : 1;
+	unsigned char has_zpci_lsi : 1;
+	unsigned char has_aisii : 1;
+	unsigned char has_aeni : 1;
+	unsigned char has_aisi : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
@@ -102,6 +118,34 @@ struct sclp_info {
 };
 extern struct sclp_info sclp;
 
+struct sccb_header {
+	u16	length;
+	u8	function_code;
+	u8	control_mask[3];
+	u16	response_code;
+} __packed;
+
+struct evbuf_header {
+	u16	length;
+	u8	type;
+	u8	flags;
+	u16	_reserved;
+} __packed;
+
+struct err_notify_evbuf {
+	struct evbuf_header header;
+	u8 action;
+	u8 atype;
+	u32 fh;
+	u32 fid;
+	u8 data[];
+} __packed;
+
+struct err_notify_sccb {
+	struct sccb_header header;
+	struct err_notify_evbuf evbuf;
+} __packed;
+
 struct zpci_report_error_header {
 	u8 version;	/* Interface version byte */
 	u8 action;	/* Action qualifier byte
@@ -124,9 +168,12 @@ int sclp_early_read_storage_info(void);
 int sclp_early_get_core_info(struct sclp_core_info *info);
 void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
 void sclp_early_detect(void);
+void sclp_early_detect_machine_features(void);
 void sclp_early_printk(const char *s);
 void __sclp_early_printk(const char *s, unsigned int len);
+void sclp_emergency_printk(const char *s);
 
+int sclp_init(void);
 int sclp_early_get_memsize(unsigned long *mem);
 int sclp_early_get_hsa_size(unsigned long *hsa_size);
 int _sclp_get_core_info(struct sclp_core_info *info);
@@ -142,8 +189,7 @@ int sclp_pci_deconfigure(u32 fid);
 int sclp_ap_configure(u32 apid);
 int sclp_ap_deconfigure(u32 apid);
 int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
-int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
 void sclp_ocf_cpc_name_copy(char *dst);
 
 static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 7ce584aff5bb..56003e26cdbf 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <asm/css_chars.h>
+#include <asm/dma-types.h>
 #include <asm/cio.h>
 
 /**
@@ -53,7 +54,7 @@ struct cmd_scsw {
 	__u32 fctl : 3;
 	__u32 actl : 7;
 	__u32 stctl : 5;
-	__u32 cpa;
+	dma32_t cpa;
 	__u32 dstat : 8;
 	__u32 cstat : 8;
 	__u32 count : 16;
@@ -93,7 +94,7 @@ struct tm_scsw {
 	u32 fctl:3;
 	u32 actl:7;
 	u32 stctl:5;
-	u32 tcw;
+	dma32_t tcw;
 	u32 dstat:8;
 	u32 cstat:8;
 	u32 fcxs:8;
@@ -125,7 +126,7 @@ struct eadm_scsw {
 	u32 fctl:3;
 	u32 actl:7;
 	u32 stctl:5;
-	u32 aob;
+	dma32_t aob;
 	u32 dstat:8;
 	u32 cstat:8;
 	u32:16;
@@ -215,6 +216,11 @@ union scsw {
 #define SNS2_ENV_DATA_PRESENT	0x10
 #define SNS2_INPRECISE_END	0x04
 
+/*
+ * architectured values for PPRC errors
+ */
+#define SNS7_INVALID_ON_SEC	0x0e
+
 /**
  * scsw_is_tm - check for transport mode scsw
  * @scsw: pointer to scsw
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 3fecaa4e8b74..0486e6ef62bf 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -23,7 +23,7 @@
  */
 #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
 
-extern unsigned long __samode31, __eamode31;
-extern unsigned long __stext_amode31, __etext_amode31;
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
 
 #endif
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
deleted file mode 100644
index aaf85a69061c..000000000000
--- a/arch/s390/include/asm/serial.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_SERIAL_H
-#define _ASM_S390_SERIAL_H
-
-#define BASE_BAUD 0
-
-#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index 950d87bd997a..94092f4ae764 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -6,37 +6,63 @@
 
 extern struct mutex cpa_mutex;
 
-#define SET_MEMORY_RO	1UL
-#define SET_MEMORY_RW	2UL
-#define SET_MEMORY_NX	4UL
-#define SET_MEMORY_X	8UL
-#define SET_MEMORY_4K  16UL
+enum {
+	_SET_MEMORY_RO_BIT,
+	_SET_MEMORY_RW_BIT,
+	_SET_MEMORY_NX_BIT,
+	_SET_MEMORY_X_BIT,
+	_SET_MEMORY_4K_BIT,
+	_SET_MEMORY_INV_BIT,
+	_SET_MEMORY_DEF_BIT,
+};
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags);
+#define SET_MEMORY_RO	BIT(_SET_MEMORY_RO_BIT)
+#define SET_MEMORY_RW	BIT(_SET_MEMORY_RW_BIT)
+#define SET_MEMORY_NX	BIT(_SET_MEMORY_NX_BIT)
+#define SET_MEMORY_X	BIT(_SET_MEMORY_X_BIT)
+#define SET_MEMORY_4K	BIT(_SET_MEMORY_4K_BIT)
+#define SET_MEMORY_INV	BIT(_SET_MEMORY_INV_BIT)
+#define SET_MEMORY_DEF	BIT(_SET_MEMORY_DEF_BIT)
 
-static inline int set_memory_ro(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RO);
-}
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
 
-static inline int set_memory_rw(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_RW);
-}
+#define set_memory_rox set_memory_rox
 
-static inline int set_memory_nx(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_NX);
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags)					\
+static inline int fname(unsigned long addr, int numpages)		\
+{									\
+	return __set_memory(addr, numpages, (flags));			\
+}									\
+									\
+static inline int __##fname(void *start, void *end)			\
+{									\
+	unsigned long numpages;						\
+									\
+	numpages = (end - start) >> PAGE_SHIFT;				\
+	return __set_memory((unsigned long)start, numpages, (flags));	\
 }
 
-static inline int set_memory_x(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_X);
-}
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
 
-static inline int set_memory_4k(unsigned long addr, int numpages)
-{
-	return __set_memory(addr, numpages, SET_MEMORY_4K);
-}
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
+bool kernel_page_present(struct page *page);
 
 #endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 77e6506898f5..031e881b4d88 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -13,27 +13,6 @@
 #define PARMAREA		0x10400
 
 #define COMMAND_LINE_SIZE CONFIG_COMMAND_LINE_SIZE
-/*
- * Machine features detected in early.c
- */
-
-#define MACHINE_FLAG_VM		BIT(0)
-#define MACHINE_FLAG_KVM	BIT(1)
-#define MACHINE_FLAG_LPAR	BIT(2)
-#define MACHINE_FLAG_DIAG9C	BIT(3)
-#define MACHINE_FLAG_ESOP	BIT(4)
-#define MACHINE_FLAG_IDTE	BIT(5)
-#define MACHINE_FLAG_EDAT1	BIT(7)
-#define MACHINE_FLAG_EDAT2	BIT(8)
-#define MACHINE_FLAG_TOPOLOGY	BIT(10)
-#define MACHINE_FLAG_TE		BIT(11)
-#define MACHINE_FLAG_TLB_LC	BIT(12)
-#define MACHINE_FLAG_VX		BIT(13)
-#define MACHINE_FLAG_TLB_GUEST	BIT(14)
-#define MACHINE_FLAG_NX		BIT(15)
-#define MACHINE_FLAG_GS		BIT(16)
-#define MACHINE_FLAG_SCC	BIT(17)
-#define MACHINE_FLAG_PCI_MIO	BIT(18)
 
 #define LPP_MAGIC		BIT(31)
 #define LPP_PID_MASK		_AC(0xffffffff, UL)
@@ -71,31 +50,12 @@ extern unsigned int zlib_dfltcc_support;
 #define ZLIB_DFLTCC_INFLATE_ONLY	3
 #define ZLIB_DFLTCC_FULL_DEBUG		4
 
-extern int noexec_disabled;
 extern unsigned long ident_map_size;
+extern unsigned long max_mappable;
 
 /* The Write Back bit position in the physaddr is given by the SLPC PCI */
 extern unsigned long mio_wb_bit_mask;
 
-#define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
-#define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
-
-#define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
-#define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
-#define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
-#define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
-#define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
-#define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
-#define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
-#define MACHINE_HAS_TLB_GUEST	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
-#define MACHINE_HAS_NX		(S390_lowcore.machine_flags & MACHINE_FLAG_NX)
-#define MACHINE_HAS_GS		(S390_lowcore.machine_flags & MACHINE_FLAG_GS)
-#define MACHINE_HAS_SCC		(S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
-#define MACHINE_HAS_PCI_MIO	(S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
-
 /*
  * Console mode. Override with conmode=
  */
@@ -115,13 +75,7 @@ extern unsigned int console_irq;
 #define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
 #define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
 
-#ifdef CONFIG_PFAULT
-extern int pfault_init(void);
-extern void pfault_fini(void);
-#else /* CONFIG_PFAULT */
-#define pfault_init()		({-1;})
-#define pfault_fini()		do { } while (0)
-#endif /* CONFIG_PFAULT */
+void register_early_console(void);
 
 #ifdef CONFIG_VMCP
 void vmcp_cma_reserve(void);
@@ -131,34 +85,17 @@ static inline void vmcp_cma_reserve(void) { }
 
 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
 
-void cmma_init(void);
-void cmma_init_nodat(void);
-
 extern void (*_machine_restart)(char *command);
 extern void (*_machine_halt)(void);
 extern void (*_machine_power_off)(void);
 
-extern unsigned long __kaslr_offset;
-static inline unsigned long kaslr_offset(void)
-{
-	return __kaslr_offset;
-}
-
-extern int is_full_image;
-
-struct initrd_data {
-	unsigned long start;
-	unsigned long size;
-};
-extern struct initrd_data initrd_data;
-
 struct oldmem_data {
 	unsigned long start;
 	unsigned long size;
 };
 extern struct oldmem_data oldmem_data;
 
-static inline u32 gen_lpswe(unsigned long addr)
+static __always_inline u32 gen_lpswe(unsigned long addr)
 {
 	BUILD_BUG_ON(addr > 0xfff);
 	return 0xb2b20000 | addr;
diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h
deleted file mode 100644
index e75d45649c54..000000000000
--- a/arch/s390/include/asm/shmparam.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/shmparam.h"
- */
-#ifndef _ASM_S390_SHMPARAM_H
-#define _ASM_S390_SHMPARAM_H
-
-#define SHMLBA PAGE_SIZE                 /* attach addr a multiple of this */
-
-#endif /* _ASM_S390_SHMPARAM_H */
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index edee63da08e7..472943b77066 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -38,6 +38,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/asm.h>
+
 static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 				u32 *status)
 {
@@ -46,13 +48,12 @@ static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 
 	asm volatile(
 		"	sigp	%[r1],%[addr],0(%[order])\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=&d" (cc), [r1] "+&d" (r1.pair)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [r1] "+d" (r1.pair)
 		: [addr] "d" (addr), [order] "a" (order)
-		: "cc");
+		: CC_CLOBBER);
 	*status = r1.even;
-	return cc;
+	return CC_TRANSFORM(cc);
 }
 
 static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7f5d4763357b..03f4d01664f8 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,11 +7,30 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
-#include <asm/sigp.h>
-#include <asm/lowcore.h>
 #include <asm/processor.h>
+#include <asm/lowcore.h>
+#include <asm/machine.h>
+#include <asm/sigp.h>
+
+static __always_inline unsigned int raw_smp_processor_id(void)
+{
+	unsigned long lc_cpu_nr;
+	unsigned int cpu;
+
+	BUILD_BUG_ON(sizeof_field(struct lowcore, cpu_nr) != sizeof(cpu));
+	lc_cpu_nr = offsetof(struct lowcore, cpu_nr);
+	asm_inline(
+		ALTERNATIVE("   ly      %[cpu],%[offzero](%%r0)\n",
+			    "   ly      %[cpu],%[offalt](%%r0)\n",
+			    ALT_FEATURE(MFEATURE_LOWCORE))
+		: [cpu] "=d" (cpu)
+		: [offzero] "i" (lc_cpu_nr),
+		  [offalt] "i" (lc_cpu_nr + LOWCORE_ALT_ADDRESS),
+		  "m" (((struct lowcore *)0)->cpu_nr));
+	return cpu;
+}
 
-#define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
+#define arch_scale_cpu_capacity smp_cpu_get_capacity
 
 extern struct mutex smp_cpu_state_mutex;
 extern unsigned int smp_cpu_mt_shift;
@@ -24,16 +43,19 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-extern void smp_call_online_cpu(void (*func)(void *), void *);
 extern void smp_call_ipl_cpu(void (*func)(void *), void *);
 extern void smp_emergency_stop(void);
 
 extern int smp_find_processor_id(u16 address);
 extern int smp_store_status(int cpu);
-extern void smp_save_dump_cpus(void);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
 extern void smp_yield_cpu(int cpu);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
+extern void smp_cpu_set_capacity(int cpu, unsigned long val);
+extern void smp_set_core_capacity(int cpu, unsigned long val);
+extern unsigned long smp_cpu_get_capacity(int cpu);
 extern int smp_cpu_get_cpu_address(int cpu);
 extern void smp_fill_possible_mask(void);
 extern void smp_detect_cpus(void);
@@ -58,7 +80,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
 {
 }
 
-extern int smp_rescan_cpus(void);
+extern int smp_rescan_cpus(bool early);
 extern void __noreturn cpu_die(void);
 extern void __cpu_die(unsigned int cpu);
 extern int __cpu_disable(void);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
index fd17f25704bd..42d61296bbad 100644
--- a/arch/s390/include/asm/softirq_stack.h
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -5,9 +5,10 @@
 #include <asm/lowcore.h>
 #include <asm/stacktrace.h>
 
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
 static inline void do_softirq_own_stack(void)
 {
-	call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
+	call_on_stack(0, get_lowcore()->async_stack, void, __do_softirq);
 }
-
+#endif
 #endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
index c549893602ea..668dfc5de538 100644
--- a/arch/s390/include/asm/sparsemem.h
+++ b/arch/s390/include/asm/sparsemem.h
@@ -2,7 +2,23 @@
 #ifndef _ASM_S390_SPARSEMEM_H
 #define _ASM_S390_SPARSEMEM_H
 
-#define SECTION_SIZE_BITS	28
+#define SECTION_SIZE_BITS	27
 #define MAX_PHYSMEM_BITS	CONFIG_MAX_PHYSMEM_BITS
 
+#ifdef CONFIG_NUMA
+
+static inline int memory_add_physaddr_to_nid(u64 addr)
+{
+	return 0;
+}
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+
+static inline int phys_to_target_node(u64 start)
+{
+	return 0;
+}
+#define phys_to_target_node phys_to_target_node
+
+#endif /* CONFIG_NUMA */
+
 #endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 37127cd7749e..f9935db9fd76 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -16,7 +16,23 @@
 #include <asm/processor.h>
 #include <asm/alternative.h>
 
-#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+static __always_inline unsigned int spinlock_lockval(void)
+{
+	unsigned long lc_lockval;
+	unsigned int lockval;
+
+	BUILD_BUG_ON(sizeof_field(struct lowcore, spinlock_lockval) != sizeof(lockval));
+	lc_lockval = offsetof(struct lowcore, spinlock_lockval);
+	asm_inline(
+		ALTERNATIVE("   ly      %[lockval],%[offzero](%%r0)\n",
+			    "   ly      %[lockval],%[offalt](%%r0)\n",
+			    ALT_FEATURE(MFEATURE_LOWCORE))
+		: [lockval] "=d" (lockval)
+		: [offzero] "i" (lc_lockval),
+		  [offalt] "i" (lc_lockval + LOWCORE_ALT_ADDRESS),
+		  "m" (((struct lowcore *)0)->spinlock_lockval));
+	return lockval;
+}
 
 extern int spin_retry;
 
@@ -57,8 +73,10 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp)
 
 static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
 {
+	int old = 0;
+
 	barrier();
-	return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
+	return likely(arch_try_cmpxchg(&lp->lock, &old, spinlock_lockval()));
 }
 
 static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -79,10 +97,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
 	typecheck(int, lp->lock);
 	kcsan_release();
 	asm_inline volatile(
-		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
-		"	sth	%1,%0\n"
-		: "=R" (((unsigned short *) &lp->lock)[1])
-		: "d" (0) : "cc", "memory");
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
+		"	mvhhi	%[lock],0\n"
+		: [lock] "=Q" (((unsigned short *)&lp->lock)[1])
+		:
+		: "memory");
 }
 
 /*
@@ -118,7 +137,9 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	if (!__atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000))
+	int old = 0;
+
+	if (!arch_try_cmpxchg(&rw->cnts, &old, 0x30000))
 		arch_write_lock_wait(rw);
 }
 
@@ -133,8 +154,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	int old;
 
 	old = READ_ONCE(rw->cnts);
-	return (!(old & 0xffff0000) &&
-		__atomic_cmpxchg_bool(&rw->cnts, old, old + 1));
+	return (!(old & 0xffff0000) && arch_try_cmpxchg(&rw->cnts, &old, old + 1));
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
@@ -142,7 +162,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	int old;
 
 	old = READ_ONCE(rw->cnts);
-	return !old && __atomic_cmpxchg_bool(&rw->cnts, 0, 0x30000);
+	return !old && arch_try_cmpxchg(&rw->cnts, &old, 0x30000);
 }
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index b69695e39957..3653ff57d6d9 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -3,7 +3,7 @@
 #define __ASM_SPINLOCK_TYPES_H
 
 #ifndef __LINUX_SPINLOCK_TYPES_RAW_H
-# error "please don't include this file directly"
+# error "Please do not include this file directly."
 #endif
 
 typedef struct {
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index b23c658dce77..1d5ca13dc90f 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -2,9 +2,27 @@
 #ifndef _ASM_S390_STACKTRACE_H
 #define _ASM_S390_STACKTRACE_H
 
+#include <linux/stacktrace.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
-#include <asm/switch_to.h>
+
+struct stack_frame_user {
+	unsigned long back_chain;
+	unsigned long empty1[5];
+	unsigned long gprs[10];
+	unsigned long empty2[4];
+};
+
+struct stack_frame_vdso_wrapper {
+	struct stack_frame_user sf;
+	unsigned long return_address;
+};
+
+struct perf_callchain_entry_ctx;
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+				 struct perf_callchain_entry_ctx *entry,
+				 const struct pt_regs *regs, bool perf);
 
 enum stack_type {
 	STACK_TYPE_UNKNOWN,
@@ -46,6 +64,8 @@ struct stack_frame {
 			unsigned long sie_savearea;
 			unsigned long sie_reason;
 			unsigned long sie_flags;
+			unsigned long sie_control_block_phys;
+			unsigned long sie_guest_asce;
 		};
 	};
 	unsigned long gprs[10];
@@ -188,17 +208,53 @@ static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
 	(rettype)r2;							\
 })
 
-#define call_on_stack_noreturn(fn, stack)				\
+/*
+ * Use call_nodat() to call a function with DAT disabled.
+ * Proper sign and zero extension of function arguments is done.
+ * Usage:
+ *
+ * rc = call_nodat(nr, rettype, fn, t1, a1, t2, a2, ...)
+ *
+ * - nr specifies the number of function arguments of fn.
+ * - fn is the function to be called, where fn is a physical address.
+ * - rettype is the return type of fn.
+ * - t1, a1, ... are pairs, where t1 must match the type of the first
+ *   argument of fn, t2 the second, etc. a1 is the corresponding
+ *   first function argument (not name), etc.
+ *
+ * fn() is called with standard C function call ABI, with the exception
+ * that no useful stackframe or stackpointer is passed via register 15.
+ * Therefore the called function must not use r15 to access the stack.
+ */
+#define call_nodat(nr, rettype, fn, ...)				\
 ({									\
-	void (*__fn)(void) = fn;					\
+	rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = (fn);		\
+	/* aligned since psw_leave must not cross page boundary */	\
+	psw_t __aligned(16) psw_leave;					\
+	psw_t psw_enter;						\
+	CALL_LARGS_##nr(__VA_ARGS__);					\
+	CALL_REGS_##nr;							\
 									\
+	CALL_TYPECHECK_##nr(__VA_ARGS__);				\
+	psw_enter.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;		\
+	psw_enter.addr = (unsigned long)__fn;				\
 	asm volatile(							\
-		"	la	15,0(%[_stack])\n"			\
-		"	xc	%[_bc](8,15),%[_bc](15)\n"		\
-		"	brasl	14,%[_fn]\n"				\
-		::[_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
-		  [_stack] "a" (stack), [_fn] "X" (__fn));		\
-	BUG();								\
+		"	epsw	0,1\n"					\
+		"	risbg	1,0,0,31,32\n"				\
+		"	larl	7,1f\n"					\
+		"	stg	1,%[psw_leave]\n"			\
+		"	stg	7,8+%[psw_leave]\n"			\
+		"	la	7,%[psw_leave]\n"			\
+		"	lra	7,0(7)\n"				\
+		"	larl	1,0f\n"					\
+		"	lra	14,0(1)\n"				\
+		"	lpswe	%[psw_enter]\n"				\
+		"0:	lpswe	0(7)\n"					\
+		"1:\n"							\
+		: CALL_FMT_##nr, [psw_leave] "=Q" (psw_leave)		\
+		: [psw_enter] "Q" (psw_enter)				\
+		: "7", CALL_CLOBBER_##nr);				\
+	(rettype)r2;							\
 })
 
 #endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index 4d74d7e33340..827cb208de86 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -94,5 +94,6 @@ struct stp_stzi {
 int stp_sync_check(void);
 int stp_island_check(void);
 void stp_queue_work(void);
+bool stp_enabled(void);
 
 #endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 3fae93ddb322..f8f68f4ef255 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -15,36 +15,33 @@
 #define __HAVE_ARCH_MEMCPY	/* gcc builtin & arch function */
 #define __HAVE_ARCH_MEMMOVE	/* gcc builtin & arch function */
 #define __HAVE_ARCH_MEMSET	/* gcc builtin & arch function */
-#define __HAVE_ARCH_MEMSET16	/* arch function */
-#define __HAVE_ARCH_MEMSET32	/* arch function */
-#define __HAVE_ARCH_MEMSET64	/* arch function */
 
 void *memcpy(void *dest, const void *src, size_t n);
 void *memset(void *s, int c, size_t n);
 void *memmove(void *dest, const void *src, size_t n);
 
-#ifndef CONFIG_KASAN
+#if !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN)
 #define __HAVE_ARCH_MEMCHR	/* inline & arch function */
 #define __HAVE_ARCH_MEMCMP	/* arch function */
 #define __HAVE_ARCH_MEMSCAN	/* inline & arch function */
 #define __HAVE_ARCH_STRCAT	/* inline & arch function */
 #define __HAVE_ARCH_STRCMP	/* arch function */
-#define __HAVE_ARCH_STRCPY	/* inline & arch function */
 #define __HAVE_ARCH_STRLCAT	/* arch function */
 #define __HAVE_ARCH_STRLEN	/* inline & arch function */
 #define __HAVE_ARCH_STRNCAT	/* arch function */
-#define __HAVE_ARCH_STRNCPY	/* arch function */
 #define __HAVE_ARCH_STRNLEN	/* inline & arch function */
 #define __HAVE_ARCH_STRSTR	/* arch function */
+#define __HAVE_ARCH_MEMSET16	/* arch function */
+#define __HAVE_ARCH_MEMSET32	/* arch function */
+#define __HAVE_ARCH_MEMSET64	/* arch function */
 
 /* Prototypes for non-inlined arch strings functions. */
 int memcmp(const void *s1, const void *s2, size_t n);
 int strcmp(const char *s1, const char *s2);
 size_t strlcat(char *dest, const char *src, size_t n);
 char *strncat(char *dest, const char *src, size_t n);
-char *strncpy(char *dest, const char *src, size_t n);
 char *strstr(const char *s1, const char *s2);
-#endif /* !CONFIG_KASAN */
+#endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */
 
 #undef __HAVE_ARCH_STRCHR
 #undef __HAVE_ARCH_STRNCHR
@@ -55,18 +52,6 @@ char *strstr(const char *s1, const char *s2);
 
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
 
-extern void *__memcpy(void *dest, const void *src, size_t n);
-extern void *__memset(void *s, int c, size_t n);
-extern void *__memmove(void *dest, const void *src, size_t n);
-
-/*
- * For files that are not instrumented (e.g. mm/slub.c) we
- * should use not instrumented version of mem* functions.
- */
-
-#define memcpy(dst, src, len) __memcpy(dst, src, len)
-#define memmove(dst, src, len) __memmove(dst, src, len)
-#define memset(s, c, n) __memset(s, c, n)
 #define strlen(s) __strlen(s)
 
 #define __no_sanitize_prefix_strfunc(x) __##x
@@ -79,24 +64,37 @@ extern void *__memmove(void *dest, const void *src, size_t n);
 #define __no_sanitize_prefix_strfunc(x) x
 #endif /* defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) */
 
+void *__memcpy(void *dest, const void *src, size_t n);
+void *__memset(void *s, int c, size_t n);
+void *__memmove(void *dest, const void *src, size_t n);
 void *__memset16(uint16_t *s, uint16_t v, size_t count);
 void *__memset32(uint32_t *s, uint32_t v, size_t count);
 void *__memset64(uint64_t *s, uint64_t v, size_t count);
 
+#ifdef __HAVE_ARCH_MEMSET16
 static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
 {
 	return __memset16(s, v, count * sizeof(v));
 }
+#endif
 
+#ifdef __HAVE_ARCH_MEMSET32
 static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
 {
 	return __memset32(s, v, count * sizeof(v));
 }
+#endif
 
+#ifdef __HAVE_ARCH_MEMSET64
+#ifdef IN_BOOT_STRING_C
+void *memset64(uint64_t *s, uint64_t v, size_t count);
+#else
 static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
 {
 	return __memset64(s, v, count * sizeof(v));
 }
+#endif
+#endif
 
 #if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
 
@@ -154,22 +152,6 @@ static inline char *strcat(char *dst, const char *src)
 }
 #endif
 
-#ifdef __HAVE_ARCH_STRCPY
-static inline char *strcpy(char *dst, const char *src)
-{
-	char *ret = dst;
-
-	asm volatile(
-		"	lghi	0,0\n"
-		"0:	mvst	%[dst],%[src]\n"
-		"	jo	0b"
-		: [dst] "+&a" (dst), [src] "+&a" (src)
-		:
-		: "cc", "memory", "0");
-	return ret;
-}
-#endif
-
 #if defined(__HAVE_ARCH_STRLEN) || (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
 static inline size_t __no_sanitize_prefix_strfunc(strlen)(const char *s)
 {
@@ -207,7 +189,6 @@ static inline size_t strnlen(const char * s, size_t n)
 void *memchr(const void * s, int c, size_t n);
 void *memscan(void *s, int c, size_t n);
 char *strcat(char *dst, const char *src);
-char *strcpy(char *dst, const char *src);
 size_t strlen(const char *s);
 size_t strnlen(const char * s, size_t n);
 #endif /* !IN_ARCH_STRING_C */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
deleted file mode 100644
index c61b2cc1a8a8..000000000000
--- a/arch/s390/include/asm/switch_to.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2009
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef __ASM_SWITCH_TO_H
-#define __ASM_SWITCH_TO_H
-
-#include <linux/thread_info.h>
-#include <asm/fpu/api.h>
-#include <asm/ptrace.h>
-#include <asm/guarded_storage.h>
-
-extern struct task_struct *__switch_to(void *, void *);
-extern void update_cr_regs(struct task_struct *task);
-
-static inline void save_access_regs(unsigned int *acrs)
-{
-	typedef struct { int _[NUM_ACRS]; } acrstype;
-
-	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
-}
-
-static inline void restore_access_regs(unsigned int *acrs)
-{
-	typedef struct { int _[NUM_ACRS]; } acrstype;
-
-	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
-}
-
-#define switch_to(prev, next, last) do {				\
-	/* save_fpu_regs() sets the CIF_FPU flag, which enforces	\
-	 * a restore of the floating point / vector registers as	\
-	 * soon as the next task returns to user space			\
-	 */								\
-	save_fpu_regs();						\
-	save_access_regs(&prev->thread.acrs[0]);			\
-	save_ri_cb(prev->thread.ri_cb);					\
-	save_gs_cb(prev->thread.gs_cb);					\
-	update_cr_regs(next);						\
-	restore_access_regs(&next->thread.acrs[0]);			\
-	restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);		\
-	restore_gs_cb(next->thread.gs_cb);				\
-	prev = __switch_to(prev, next);					\
-} while (0)
-
-#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 27e3d804b311..bd4cb00ccd5e 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -24,6 +24,18 @@ static inline long syscall_get_nr(struct task_struct *task,
 		(regs->int_code & 0xffff) : -1;
 }
 
+static inline void syscall_set_nr(struct task_struct *task,
+				  struct pt_regs *regs,
+				  int nr)
+{
+	/*
+	 * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+	 * the target task is stopped for tracing on entering syscall, so
+	 * there is no need to have the same check syscall_get_nr() has.
+	 */
+	regs->int_code = (regs->int_code & ~0xffff) | (nr & 0xffff);
+}
+
 static inline void syscall_rollback(struct task_struct *task,
 				    struct pt_regs *regs)
 {
@@ -65,19 +77,26 @@ static inline void syscall_get_arguments(struct task_struct *task,
 					 unsigned long *args)
 {
 	unsigned long mask = -1UL;
-	unsigned int n = 6;
 
 #ifdef CONFIG_COMPAT
 	if (test_tsk_thread_flag(task, TIF_31BIT))
 		mask = 0xffffffff;
 #endif
-	while (n-- > 0)
-		if (n > 0)
-			args[n] = regs->gprs[2 + n] & mask;
+	for (int i = 1; i < 6; i++)
+		args[i] = regs->gprs[2 + i] & mask;
 
 	args[0] = regs->orig_gpr2 & mask;
 }
 
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 const unsigned long *args)
+{
+	regs->orig_gpr2 = args[0];
+	for (int n = 1; n < 6; n++)
+		regs->gprs[2 + n] = args[n];
+}
+
 static inline int syscall_get_arch(struct task_struct *task)
 {
 #ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index fde7e6b1df48..35c1d1b860d8 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -7,36 +7,13 @@
 #ifndef _ASM_S390_SYSCALL_WRAPPER_H
 #define _ASM_S390_SYSCALL_WRAPPER_H
 
-#define __SC_TYPE(t, a) t
-
-#define SYSCALL_PT_ARG6(regs, m, t1, t2, t3, t4, t5, t6)\
-	SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5),	\
-		m(t6, (regs->gprs[7]))
-
-#define SYSCALL_PT_ARG5(regs, m, t1, t2, t3, t4, t5)	\
-	SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4),	\
-		m(t5, (regs->gprs[6]))
-
-#define SYSCALL_PT_ARG4(regs, m, t1, t2, t3, t4)	\
-	SYSCALL_PT_ARG3(regs, m, t1, t2, t3),		\
-		m(t4, (regs->gprs[5]))
-
-#define SYSCALL_PT_ARG3(regs, m, t1, t2, t3)		\
-	SYSCALL_PT_ARG2(regs, m, t1, t2),		\
-		m(t3, (regs->gprs[4]))
-
-#define SYSCALL_PT_ARG2(regs, m, t1, t2)		\
-	SYSCALL_PT_ARG1(regs, m, t1),			\
-		m(t2, (regs->gprs[3]))
-
-#define SYSCALL_PT_ARG1(regs, m, t1)			\
-		m(t1, (regs->orig_gpr2))
-
-#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+/* Mapping of registers to parameters for syscalls */
+#define SC_S390_REGS_TO_ARGS(x, ...)					\
+	__MAP(x, __SC_ARGS						\
+	      ,, regs->orig_gpr2,, regs->gprs[3],, regs->gprs[4]	\
+	      ,, regs->gprs[5],, regs->gprs[6],, regs->gprs[7])
 
 #ifdef CONFIG_COMPAT
-#define __SC_COMPAT_TYPE(t, a) \
-	__typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
 
 #define __SC_COMPAT_CAST(t, a)						\
 ({									\
@@ -56,110 +33,108 @@
 	(t)__ReS;							\
 })
 
-#define __S390_SYS_STUBx(x, name, ...)						\
-	long __s390_sys##name(struct pt_regs *regs);				\
-	ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO);				\
-	long __s390_sys##name(struct pt_regs *regs)				\
-	{									\
-		long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs,		\
-			__SC_COMPAT_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__)));	\
-		__MAP(x,__SC_TEST,__VA_ARGS__);					\
-		return ret;							\
-	}
-
 /*
  * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
  * named __s390x_sys_*()
  */
 #define COMPAT_SYSCALL_DEFINE0(sname)					\
-	SYSCALL_METADATA(_##sname, 0);					\
 	long __s390_compat_sys_##sname(void);				\
 	ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO);	\
 	long __s390_compat_sys_##sname(void)
 
 #define SYSCALL_DEFINE0(sname)						\
 	SYSCALL_METADATA(_##sname, 0);					\
+	long __s390_sys_##sname(void);					\
+	ALLOW_ERROR_INJECTION(__s390_sys_##sname, ERRNO);		\
 	long __s390x_sys_##sname(void);					\
 	ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO);		\
+	static inline long __do_sys_##sname(void);			\
 	long __s390_sys_##sname(void)					\
-		__attribute__((alias(__stringify(__s390x_sys_##sname)))); \
-	long __s390x_sys_##sname(void)
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	long __s390x_sys_##sname(void)					\
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	static inline long __do_sys_##sname(void)
 
 #define COND_SYSCALL(name)						\
 	cond_syscall(__s390x_sys_##name);				\
 	cond_syscall(__s390_sys_##name)
 
-#define SYS_NI(name)							\
-	SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers);		\
-	SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers)
-
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)						\
-	__diag_push();									\
-	__diag_ignore(GCC, 8, "-Wattribute-alias",					\
-		      "Type aliasing is used to sanitize syscall arguments");		\
 	long __s390_compat_sys##name(struct pt_regs *regs);				\
-	long __s390_compat_sys##name(struct pt_regs *regs)				\
-		__attribute__((alias(__stringify(__se_compat_sys##name))));		\
 	ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO);				\
-	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
-	long __se_compat_sys##name(struct pt_regs *regs);				\
-	long __se_compat_sys##name(struct pt_regs *regs)				\
+	static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__));	\
+	long __s390_compat_sys##name(struct pt_regs *regs)				\
+	{										\
+		return __se_compat_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}										\
+	static inline long __se_compat_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
 	{										\
-		long ret = __do_compat_sys##name(SYSCALL_PT_ARGS(x, regs, __SC_DELOUSE,	\
-						 __MAP(x, __SC_TYPE, __VA_ARGS__)));	\
-		__MAP(x,__SC_TEST,__VA_ARGS__);						\
-		return ret;								\
+		__MAP(x, __SC_TEST, __VA_ARGS__);					\
+		return __do_compat_sys##name(__MAP(x, __SC_DELOUSE, __VA_ARGS__));	\
 	}										\
-	__diag_pop();									\
-	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+	static inline long __do_compat_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
 
 /*
  * As some compat syscalls may not be implemented, we need to expand
- * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
- * kernel/time/posix-stubs.c to cover this case as well.
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well.
  */
 #define COND_SYSCALL_COMPAT(name)					\
 	cond_syscall(__s390_compat_sys_##name)
 
-#define COMPAT_SYS_NI(name)						\
-	SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers)
+#define __S390_SYS_STUBx(x, name, ...)						\
+	long __s390_sys##name(struct pt_regs *regs);				\
+	ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO);				\
+	static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	long __s390_sys##name(struct pt_regs *regs)				\
+	{									\
+		return ___se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}									\
+	static inline long ___se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
+	{									\
+		__MAP(x, __SC_TEST, __VA_ARGS__);				\
+		return __do_sys##name(__MAP(x, __SC_COMPAT_CAST, __VA_ARGS__));	\
+	}
 
 #else /* CONFIG_COMPAT */
 
-#define __S390_SYS_STUBx(x, fullname, name, ...)
-
 #define SYSCALL_DEFINE0(sname)						\
 	SYSCALL_METADATA(_##sname, 0);					\
 	long __s390x_sys_##sname(void);					\
 	ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO);		\
-	long __s390x_sys_##sname(void)
+	static inline long __do_sys_##sname(void);			\
+	long __s390x_sys_##sname(void)					\
+	{								\
+		return __do_sys_##sname();				\
+	}								\
+	static inline long __do_sys_##sname(void)
 
 #define COND_SYSCALL(name)						\
 	cond_syscall(__s390x_sys_##name)
 
-#define SYS_NI(name)							\
-	SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers);
+#define __S390_SYS_STUBx(x, fullname, name, ...)
 
 #endif /* CONFIG_COMPAT */
 
-#define __SYSCALL_DEFINEx(x, name, ...)							\
-	__diag_push();									\
-	__diag_ignore(GCC, 8, "-Wattribute-alias",					\
-		      "Type aliasing is used to sanitize syscall arguments");		\
-	long __s390x_sys##name(struct pt_regs *regs)					\
-		__attribute__((alias(__stringify(__se_sys##name))));			\
-	ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO);				\
-	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));		\
-	long __se_sys##name(struct pt_regs *regs);					\
-	__S390_SYS_STUBx(x, name, __VA_ARGS__)						\
-	long __se_sys##name(struct pt_regs *regs)					\
-	{										\
-		long ret = __do_sys##name(SYSCALL_PT_ARGS(x, regs,			\
-				    __SC_CAST, __MAP(x, __SC_TYPE, __VA_ARGS__)));	\
-		__MAP(x,__SC_TEST,__VA_ARGS__);						\
-		return ret;								\
-	}										\
-	__diag_pop();									\
-	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	long __s390x_sys##name(struct pt_regs *regs);				\
+	ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO);			\
+	static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__));	\
+	static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__));	\
+	__S390_SYS_STUBx(x, name, __VA_ARGS__);					\
+	long __s390x_sys##name(struct pt_regs *regs)				\
+	{									\
+		return __se_sys##name(SC_S390_REGS_TO_ARGS(x, __VA_ARGS__));	\
+	}									\
+	static inline long __se_sys##name(__MAP(x, __SC_LONG, __VA_ARGS__))	\
+	{									\
+		__MAP(x, __SC_TEST, __VA_ARGS__);				\
+		return __do_sys##name(__MAP(x, __SC_CAST, __VA_ARGS__));	\
+	}									\
+	static inline long __do_sys##name(__MAP(x, __SC_DECL, __VA_ARGS__))
 
 #endif /* _ASM_S390_SYSCALL_WRAPPER_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index ab1c6316055c..9088c5267f35 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -11,8 +11,34 @@
 #ifndef __ASM_S390_SYSINFO_H
 #define __ASM_S390_SYSINFO_H
 
-#include <asm/bitsperlong.h>
 #include <linux/uuid.h>
+#include <asm/bitsperlong.h>
+#include <asm/asm.h>
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+static inline int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int r0 = (fc << 28) | sel1;
+	int cc;
+
+	asm volatile(
+		"	lr	%%r0,%[r0]\n"
+		"	lr	%%r1,%[r1]\n"
+		"	stsi	%[sysinfo]\n"
+		"	lr	%[r0],%%r0\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [r0] "+d" (r0), [sysinfo] "=Q" (*(char *)sysinfo)
+		: [r1] "d" (sel2)
+		: CC_CLOBBER_LIST("0", "1", "memory"));
+	if (cc == 3)
+		return -EOPNOTSUPP;
+	return fc ? 0 : (unsigned int)r0 >> 28;
+}
 
 struct sysinfo_1_1_1 {
 	unsigned char p:1;
@@ -40,6 +66,10 @@ struct sysinfo_1_1_1 {
 	unsigned int ncr;
 	unsigned int npr;
 	unsigned int ntr;
+	char reserved_3[4];
+	char model_var_cap[16];
+	unsigned int model_var_cap_rating;
+	unsigned int nvr;
 };
 
 struct sysinfo_1_2_1 {
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
deleted file mode 100644
index 46fa3020b41e..000000000000
--- a/arch/s390/include/asm/termios.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/termios.h"
- */
-#ifndef _S390_TERMIOS_H
-#define _S390_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-
-/*	intr=^C		quit=^\		erase=del	kill=^U
-	eof=^D		vtime=\0	vmin=\1		sxtc=\0
-	start=^Q	stop=^S		susp=^Z		eol=\0
-	reprint=^R	discard=^U	werase=^W	lnext=^V
-	eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-
-#include <asm-generic/termios-base.h>
-
-#endif	/* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index b2ffcb4fe000..391eb04d26d8 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -9,11 +9,12 @@
 #define _ASM_THREAD_INFO_H
 
 #include <linux/bits.h>
+#include <vdso/page.h>
 
 /*
  * General size of kernel stacks
  */
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) || defined(CONFIG_KMSAN)
 #define THREAD_SIZE_ORDER 4
 #else
 #define THREAD_SIZE_ORDER 2
@@ -21,12 +22,9 @@
 #define BOOT_STACK_SIZE (PAGE_SIZE << 2)
 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
 
-#ifndef __ASSEMBLY__
-#include <asm/lowcore.h>
-#include <asm/page.h>
+#define STACK_INIT_OFFSET (THREAD_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
 
-#define STACK_INIT_OFFSET \
-	(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
+#ifndef __ASSEMBLY__
 
 /*
  * low level task data that entry.S needs immediate access to
@@ -38,6 +36,7 @@ struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
 	unsigned int		cpu;		/* current CPU */
+	unsigned char		sie;		/* running in SIE context */
 };
 
 /*
@@ -50,9 +49,6 @@ struct thread_info {
 
 struct task_struct;
 
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-
 void arch_setup_new_exec(void);
 #define arch_setup_new_exec arch_setup_new_exec
 
@@ -61,46 +57,45 @@ void arch_setup_new_exec(void);
 /*
  * thread information flags bit numbers
  */
-/* _TIF_WORK bits */
 #define TIF_NOTIFY_RESUME	0	/* callback before returning to user */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_UPROBE		3	/* breakpointed or single-stepping */
-#define TIF_GUARDED_STORAGE	4	/* load guarded storage control block */
+#define TIF_NEED_RESCHED_LAZY	3	/* lazy rescheduling needed */
+#define TIF_UPROBE		4	/* breakpointed or single-stepping */
 #define TIF_PATCH_PENDING	5	/* pending live patching update */
-#define TIF_PGSTE		6	/* New mm's will use 4K page tables */
+#define TIF_ASCE_PRIMARY	6	/* primary asce is kernel asce */
 #define TIF_NOTIFY_SIGNAL	7	/* signal notifications exist */
-#define TIF_ISOLATE_BP		8	/* Run process with isolated BP */
+#define TIF_GUARDED_STORAGE	8	/* load guarded storage control block */
 #define TIF_ISOLATE_BP_GUEST	9	/* Run KVM guests with isolated BP */
 #define TIF_PER_TRAP		10	/* Need to handle PER trap on exit to usermode */
-
 #define TIF_31BIT		16	/* 32bit process */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal() */
 #define TIF_SINGLE_STEP		19	/* This task is single stepped */
 #define TIF_BLOCK_STEP		20	/* This task is block stepped */
 #define TIF_UPROBE_SINGLESTEP	21	/* This task is uprobe single stepped */
-
-/* _TIF_TRACE bits */
 #define TIF_SYSCALL_TRACE	24	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	25	/* syscall auditing active */
 #define TIF_SECCOMP		26	/* secure computing */
 #define TIF_SYSCALL_TRACEPOINT	27	/* syscall tracepoint instrumentation */
 
 #define _TIF_NOTIFY_RESUME	BIT(TIF_NOTIFY_RESUME)
-#define _TIF_NOTIFY_SIGNAL	BIT(TIF_NOTIFY_SIGNAL)
 #define _TIF_SIGPENDING		BIT(TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	BIT(TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY	BIT(TIF_NEED_RESCHED_LAZY)
 #define _TIF_UPROBE		BIT(TIF_UPROBE)
-#define _TIF_GUARDED_STORAGE	BIT(TIF_GUARDED_STORAGE)
 #define _TIF_PATCH_PENDING	BIT(TIF_PATCH_PENDING)
-#define _TIF_ISOLATE_BP		BIT(TIF_ISOLATE_BP)
+#define _TIF_ASCE_PRIMARY	BIT(TIF_ASCE_PRIMARY)
+#define _TIF_NOTIFY_SIGNAL	BIT(TIF_NOTIFY_SIGNAL)
+#define _TIF_GUARDED_STORAGE	BIT(TIF_GUARDED_STORAGE)
 #define _TIF_ISOLATE_BP_GUEST	BIT(TIF_ISOLATE_BP_GUEST)
 #define _TIF_PER_TRAP		BIT(TIF_PER_TRAP)
-
 #define _TIF_31BIT		BIT(TIF_31BIT)
+#define _TIF_MEMDIE		BIT(TIF_MEMDIE)
+#define _TIF_RESTORE_SIGMASK	BIT(TIF_RESTORE_SIGMASK)
 #define _TIF_SINGLE_STEP	BIT(TIF_SINGLE_STEP)
-
+#define _TIF_BLOCK_STEP		BIT(TIF_BLOCK_STEP)
+#define _TIF_UPROBE_SINGLESTEP	BIT(TIF_UPROBE_SINGLESTEP)
 #define _TIF_SYSCALL_TRACE	BIT(TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	BIT(TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		BIT(TIF_SECCOMP)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index ce878e85b6e4..bed8d0b5a282 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -13,6 +13,8 @@
 #include <linux/preempt.h>
 #include <linux/time64.h>
 #include <asm/lowcore.h>
+#include <asm/machine.h>
+#include <asm/asm.h>
 
 /* The value of the TOD clock for 1.1.1970. */
 #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -44,11 +46,12 @@ static inline int set_tod_clock(__u64 time)
 	int cc;
 
 	asm volatile(
-		"   sck   %1\n"
-		"   ipm   %0\n"
-		"   srl   %0,28\n"
-		: "=d" (cc) : "Q" (time) : "cc");
-	return cc;
+		"	sck	%[time]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [time] "Q" (time)
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
 static inline int store_tod_clock_ext_cc(union tod_clock *clk)
@@ -56,14 +59,15 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk)
 	int cc;
 
 	asm volatile(
-		"   stcke  %1\n"
-		"   ipm   %0\n"
-		"   srl   %0,28\n"
-		: "=d" (cc), "=Q" (*clk) : : "cc");
-	return cc;
+		"	stcke	%[clk]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [clk] "=Q" (*clk)
+		:
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
-static inline void store_tod_clock_ext(union tod_clock *tod)
+static __always_inline void store_tod_clock_ext(union tod_clock *tod)
 {
 	asm volatile("stcke %0" : "=Q" (*tod) : : "cc");
 }
@@ -93,6 +97,7 @@ extern unsigned char ptff_function_mask[16];
 #define PTFF_QAF	0x00	/* query available functions */
 #define PTFF_QTO	0x01	/* query tod offset */
 #define PTFF_QSI	0x02	/* query steering information */
+#define PTFF_QPT	0x03	/* query physical clock */
 #define PTFF_QUI	0x04	/* query UTC information */
 #define PTFF_ATO	0x40	/* adjust tod offset */
 #define PTFF_STO	0x41	/* set tod offset */
@@ -149,35 +154,34 @@ struct ptff_qui {
 		"	lgr	0,%[reg0]\n"				\
 		"	lgr	1,%[reg1]\n"				\
 		"	ptff\n"						\
-		"	ipm	%[rc]\n"				\
-		"	srl	%[rc],28\n"				\
-		: [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1)	\
+		CC_IPM(rc)						\
+		: CC_OUT(rc, rc), "+m" (*(struct addrtype *)reg1)	\
 		: [reg0] "d" (reg0), [reg1] "d" (reg1)			\
-		: "cc", "0", "1");					\
-	rc;								\
+		: CC_CLOBBER_LIST("0", "1"));				\
+	CC_TRANSFORM(rc);						\
 })
 
 static inline unsigned long local_tick_disable(void)
 {
 	unsigned long old;
 
-	old = S390_lowcore.clock_comparator;
-	S390_lowcore.clock_comparator = clock_comparator_max;
-	set_clock_comparator(S390_lowcore.clock_comparator);
+	old = get_lowcore()->clock_comparator;
+	get_lowcore()->clock_comparator = clock_comparator_max;
+	set_clock_comparator(get_lowcore()->clock_comparator);
 	return old;
 }
 
 static inline void local_tick_enable(unsigned long comp)
 {
-	S390_lowcore.clock_comparator = comp;
-	set_clock_comparator(S390_lowcore.clock_comparator);
+	get_lowcore()->clock_comparator = comp;
+	set_clock_comparator(get_lowcore()->clock_comparator);
 }
 
 #define CLOCK_TICK_RATE		1193180 /* Underlying HZ */
 
 typedef unsigned long cycles_t;
 
-static inline unsigned long get_tod_clock(void)
+static __always_inline unsigned long get_tod_clock(void)
 {
 	union tod_clock clk;
 
@@ -204,6 +208,11 @@ void init_cpu_timer(void);
 
 extern union tod_clock tod_clock_base;
 
+static __always_inline unsigned long __get_tod_clock_monotonic(void)
+{
+	return get_tod_clock() - tod_clock_base.tod;
+}
+
 /**
  * get_clock_monotonic - returns current time in clock rate units
  *
@@ -216,7 +225,7 @@ static inline unsigned long get_tod_clock_monotonic(void)
 	unsigned long tod;
 
 	preempt_disable_notrace();
-	tod = get_tod_clock() - tod_clock_base.tod;
+	tod = __get_tod_clock_monotonic();
 	preempt_enable_notrace();
 	return tod;
 }
@@ -240,11 +249,16 @@ static inline unsigned long get_tod_clock_monotonic(void)
  * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
-static inline unsigned long tod_to_ns(unsigned long todval)
+static __always_inline unsigned long tod_to_ns(unsigned long todval)
 {
 	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
+static __always_inline u128 eitod_to_ns(u128 todval)
+{
+	return (todval * 125) >> 9;
+}
+
 /**
  * tod_after - compare two 64 bit TOD values
  * @a: first 64 bit TOD timestamp
@@ -254,7 +268,7 @@ static inline unsigned long tod_to_ns(unsigned long todval)
  */
 static inline int tod_after(unsigned long a, unsigned long b)
 {
-	if (MACHINE_HAS_SCC)
+	if (machine_has_scc())
 		return (long) a > (long) b;
 	return a > b;
 }
@@ -268,7 +282,7 @@ static inline int tod_after(unsigned long a, unsigned long b)
  */
 static inline int tod_after_eq(unsigned long a, unsigned long b)
 {
-	if (MACHINE_HAS_SCC)
+	if (machine_has_scc())
 		return (long) a >= (long) b;
 	return a >= b;
 }
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3a5c8fb590e5..1e50f6f1ad9d 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,10 +22,11 @@
  * Pages used for the page tables is a different story. FIXME: more
  */
 
-void __tlb_remove_table(void *_table);
 static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size);
+		struct page *page, bool delay_rmap, int page_size);
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+		struct page *page, unsigned int nr_pages, bool delay_rmap);
 
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
@@ -35,16 +36,36 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
+#include <asm/gmap.h>
 
 /*
  * Release the page cache reference for a pte removed by
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
- * has already been freed, so just do free_page_and_swap_cache.
+ * has already been freed, so just do free_folio_and_swap_cache.
+ *
+ * s390 doesn't delay rmap removal.
  */
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
+		struct page *page, bool delay_rmap, int page_size)
+{
+	VM_WARN_ON_ONCE(delay_rmap);
+
+	free_folio_and_swap_cache(page_folio(page));
+	return false;
+}
+
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+		struct page *page, unsigned int nr_pages, bool delay_rmap)
 {
-	free_page_and_swap_cache(page);
+	struct encoded_page *encoded_pages[] = {
+		encode_page(page, ENCODED_PAGE_BIT_NR_PAGES_NEXT),
+		encode_nr_pages(nr_pages),
+	};
+
+	VM_WARN_ON_ONCE(delay_rmap);
+	VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1));
+
+	free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages));
 	return false;
 }
 
@@ -64,12 +85,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_pmds = 1;
-	/*
-	 * page_table_free_rcu takes care of the allocation bit masks
-	 * of the 2K table fragments in the 4K page table page,
-	 * then calls tlb_remove_table.
-	 */
-	page_table_free_rcu(tlb, (unsigned long *) pte, address);
+	if (mm_has_pgste(tlb->mm))
+		gmap_unlink(tlb->mm, (unsigned long *)pte, address);
+	tlb_remove_ptdesc(tlb, virt_to_ptdesc(pte));
 }
 
 /*
@@ -84,12 +102,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 {
 	if (mm_pmd_folded(tlb->mm))
 		return;
-	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	__tlb_adjust_range(tlb, address, PAGE_SIZE);
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_puds = 1;
-	tlb_remove_table(tlb, pmd);
+	tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
 }
 
 /*
@@ -107,7 +124,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 	__tlb_adjust_range(tlb, address, PAGE_SIZE);
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
-	tlb_remove_table(tlb, p4d);
+	tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
 }
 
 /*
@@ -122,11 +139,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 {
 	if (mm_pud_folded(tlb->mm))
 		return;
+	__tlb_adjust_range(tlb, address, PAGE_SIZE);
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_p4ds = 1;
-	tlb_remove_table(tlb, pud);
+	tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
 }
 
-
 #endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index a6e2cd89b609..75491baa2197 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -2,9 +2,11 @@
 #ifndef _S390_TLBFLUSH_H
 #define _S390_TLBFLUSH_H
 
+#include <linux/cpufeature.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
+#include <asm/machine.h>
 
 /*
  * Flush all TLB entries on the local CPU.
@@ -22,7 +24,7 @@ static inline void __tlb_flush_idte(unsigned long asce)
 	unsigned long opt;
 
 	opt = IDTE_PTOA;
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		opt |= IDTE_GUEST_ASCE;
 	/* Global TLB flush for the mm */
 	asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc");
@@ -46,18 +48,13 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
 	unsigned long gmap_asce;
 
-	/*
-	 * If the machine has IDTE we prefer to do a per mm flush
-	 * on all cpus instead of doing a local flush if the mm
-	 * only ran on the local cpu.
-	 */
 	preempt_disable();
 	atomic_inc(&mm->context.flush_count);
 	/* Reset TLB flush mask */
 	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	barrier();
 	gmap_asce = READ_ONCE(mm->context.gmap_asce);
-	if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+	if (cpu_has_idte() && gmap_asce != -1UL) {
 		if (gmap_asce)
 			__tlb_flush_idte(gmap_asce);
 		__tlb_flush_idte(mm->context.asce);
@@ -71,7 +68,7 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
 
 static inline void __tlb_flush_kernel(void)
 {
-	if (MACHINE_HAS_IDTE)
+	if (cpu_has_idte())
 		__tlb_flush_idte(init_mm.context.asce);
 	else
 		__tlb_flush_global();
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 3a0ac0c7a9a3..44110847342a 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -61,12 +61,21 @@ static inline void topology_expect_change(void) { }
 
 #endif /* CONFIG_SCHED_TOPOLOGY */
 
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+	return smp_get_base_cpu(cpu) == cpu;
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
 #define POLARIZATION_UNKNOWN	(-1)
 #define POLARIZATION_HRZ	(0)
 #define POLARIZATION_VL		(1)
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VH		(3)
 
+#define CPU_CAPACITY_HIGH	SCHED_CAPACITY_SCALE
+#define CPU_CAPACITY_LOW	(SCHED_CAPACITY_SCALE >> 3)
+
 #define SD_BOOK_INIT	SD_CPU_INIT
 
 #ifdef CONFIG_NUMA
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index 1ac538b8cbf5..f76e5fdff23a 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -19,6 +19,19 @@ struct tpi_info {
 	u32 :12;
 } __packed __aligned(4);
 
+/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
+struct tpi_adapter_info {
+	u32 aism:8;
+	u32 :22;
+	u32 error:1;
+	u32 forward:1;
+	u32 reserved;
+	u32 adapter_IO:1;
+	u32 directed_irq:1;
+	u32 isc:3;
+	u32 :27;
+} __packed __aligned(4);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/trace/hiperdispatch.h b/arch/s390/include/asm/trace/hiperdispatch.h
new file mode 100644
index 000000000000..46462ee645b0
--- /dev/null
+++ b/arch/s390/include/asm/trace/hiperdispatch.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Tracepoint header for hiperdispatch
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_HIPERDISPATCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_HIPERDISPATCH_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE hiperdispatch
+
+TRACE_EVENT(s390_hd_work_fn,
+	    TP_PROTO(int steal_time_percentage,
+		     int entitled_core_count,
+		     int highcap_core_count),
+	    TP_ARGS(steal_time_percentage,
+		    entitled_core_count,
+		    highcap_core_count),
+	    TP_STRUCT__entry(__field(int, steal_time_percentage)
+			     __field(int, entitled_core_count)
+			     __field(int, highcap_core_count)),
+	    TP_fast_assign(__entry->steal_time_percentage = steal_time_percentage;
+			   __entry->entitled_core_count = entitled_core_count;
+			   __entry->highcap_core_count = highcap_core_count;),
+	    TP_printk("steal: %d entitled_core_count: %d highcap_core_count: %d",
+		      __entry->steal_time_percentage,
+		      __entry->entitled_core_count,
+		      __entry->highcap_core_count)
+);
+
+TRACE_EVENT(s390_hd_rebuild_domains,
+	    TP_PROTO(int current_highcap_core_count,
+		     int new_highcap_core_count),
+	    TP_ARGS(current_highcap_core_count,
+		    new_highcap_core_count),
+	    TP_STRUCT__entry(__field(int, current_highcap_core_count)
+			     __field(int, new_highcap_core_count)),
+	    TP_fast_assign(__entry->current_highcap_core_count = current_highcap_core_count;
+			   __entry->new_highcap_core_count = new_highcap_core_count),
+	    TP_printk("change highcap_core_count: %u -> %u",
+		      __entry->current_highcap_core_count,
+		      __entry->new_highcap_core_count)
+);
+
+#endif /* _TRACE_S390_HIPERDISPATCH_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index c2c9995466e0..a43fc88c0050 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -13,25 +13,81 @@
 /*
  * User space memory access functions
  */
+#include <linux/pgtable.h>
 #include <asm/asm-extable.h>
 #include <asm/processor.h>
-#include <asm/ctl_reg.h>
 #include <asm/extable.h>
 #include <asm/facility.h>
 #include <asm-generic/access_ok.h>
+#include <asm/asce.h>
+#include <linux/instrumented.h>
 
 void debug_user_asce(int exit);
 
-unsigned long __must_check
-raw_copy_from_user(void *to, const void __user *from, unsigned long n);
-
-unsigned long __must_check
-raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifdef CONFIG_KMSAN
+#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory
+#else
+#define uaccess_kmsan_or_inline __always_inline
+#endif
 
-#ifndef CONFIG_KASAN
 #define INLINE_COPY_FROM_USER
 #define INLINE_COPY_TO_USER
-#endif
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user *from, unsigned long size)
+{
+	unsigned long osize;
+	int cc;
+
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	lhi	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+			EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+			: [spec] "I" (0x81), [from] "Q" (*(const char __user *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (__builtin_constant_p(osize) && osize <= 4096)
+			return osize - size;
+		if (likely(CC_TRANSFORM(cc) == 0))
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
+}
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long size)
+{
+	unsigned long osize;
+	int cc;
+
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	llilh	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_TO(0b, 0b)
+			EX_TABLE_UA_MVCOS_TO(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+			: [spec] "I" (0x81), [from] "Q" (*(const char *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (__builtin_constant_p(osize) && osize <= 4096)
+			return osize - size;
+		if (likely(CC_TRANSFORM(cc) == 0))
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
+}
 
 unsigned long __must_check
 _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
@@ -55,163 +111,113 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo
 	return n;
 }
 
-union oac {
-	unsigned int val;
-	struct {
-		struct {
-			unsigned short key : 4;
-			unsigned short	   : 4;
-			unsigned short as  : 2;
-			unsigned short	   : 4;
-			unsigned short k   : 1;
-			unsigned short a   : 1;
-		} oac1;
-		struct {
-			unsigned short key : 4;
-			unsigned short	   : 4;
-			unsigned short as  : 2;
-			unsigned short	   : 4;
-			unsigned short k   : 1;
-			unsigned short a   : 1;
-		} oac2;
-	};
-};
-
 int __noreturn __put_user_bad(void);
 
-#define __put_user_asm(to, from, size)					\
-({									\
-	union oac __oac_spec = {					\
-		.oac1.as = PSW_BITS_AS_SECONDARY,			\
-		.oac1.a = 1,						\
-	};								\
-	int __rc;							\
-									\
-	asm volatile(							\
-		"	lr	0,%[spec]\n"				\
-		"0:	mvcos	%[_to],%[_from],%[_size]\n"		\
-		"1:	xr	%[rc],%[rc]\n"				\
-		"2:\n"							\
-		EX_TABLE_UA_STORE(0b, 2b, %[rc])			\
-		EX_TABLE_UA_STORE(1b, 2b, %[rc])			\
-		: [rc] "=&d" (__rc), [_to] "+Q" (*(to))			\
-		: [_size] "d" (size), [_from] "Q" (*(from)),		\
-		  [spec] "d" (__oac_spec.val)				\
-		: "cc", "0");						\
-	__rc;								\
-})
-
-static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
-	int rc;
-
-	switch (size) {
-	case 1:
-		rc = __put_user_asm((unsigned char __user *)ptr,
-				    (unsigned char *)x,
-				    size);
-		break;
-	case 2:
-		rc = __put_user_asm((unsigned short __user *)ptr,
-				    (unsigned short *)x,
-				    size);
-		break;
-	case 4:
-		rc = __put_user_asm((unsigned int __user *)ptr,
-				    (unsigned int *)x,
-				    size);
-		break;
-	case 8:
-		rc = __put_user_asm((unsigned long __user *)ptr,
-				    (unsigned long *)x,
-				    size);
-		break;
-	default:
-		__put_user_bad();
-		break;
-	}
-	return rc;
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_PUT_USER_NOINSTR(type)					\
+static uaccess_kmsan_or_inline int					\
+__put_user_##type##_noinstr(unsigned type __user *to,			\
+			    unsigned type *from,			\
+			    unsigned long size)				\
+{									\
+	asm goto(							\
+		"	llilh	%%r0,%[spec]\n"				\
+		"0:	mvcos	%[to],%[from],%[size]\n"		\
+		"1:	nopr	%%r7\n"					\
+		EX_TABLE(0b, %l[Efault])				\
+		EX_TABLE(1b, %l[Efault])				\
+		: [to] "+Q" (*to)					\
+		: [size] "d" (size), [from] "Q" (*from),		\
+		  [spec] "I" (0x81)					\
+		: "cc", "0"						\
+		: Efault						\
+		);							\
+	return 0;							\
+Efault:									\
+	return -EFAULT;							\
 }
 
-int __noreturn __get_user_bad(void);
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
 
-#define __get_user_asm(to, from, size)					\
-({									\
-	union oac __oac_spec = {					\
-		.oac2.as = PSW_BITS_AS_SECONDARY,			\
-		.oac2.a = 1,						\
-	};								\
-	int __rc;							\
+#define DEFINE_PUT_USER_NOINSTR(type)					\
+static uaccess_kmsan_or_inline int					\
+__put_user_##type##_noinstr(unsigned type __user *to,			\
+			    unsigned type *from,			\
+			    unsigned long size)				\
+{									\
+	int rc;								\
 									\
-	asm volatile(							\
-		"	lr	0,%[spec]\n"				\
-		"0:	mvcos	0(%[_to]),%[_from],%[_size]\n"		\
-		"1:	xr	%[rc],%[rc]\n"				\
+	asm_inline volatile(						\
+		"	llilh	%%r0,%[spec]\n"				\
+		"0:	mvcos	%[to],%[from],%[size]\n"		\
+		"1:	lhi	%[rc],0\n"				\
 		"2:\n"							\
-		EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize])	\
-		EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize])	\
-		: [rc] "=&d" (__rc), "=Q" (*(to))			\
-		: [_size] "d" (size), [_from] "Q" (*(from)),		\
-		  [spec] "d" (__oac_spec.val), [_to] "a" (to),		\
-		  [_ksize] "K" (size)					\
+		EX_TABLE_UA_FAULT(0b, 2b, %[rc])			\
+		EX_TABLE_UA_FAULT(1b, 2b, %[rc])			\
+		: [rc] "=d" (rc), [to] "+Q" (*to)			\
+		: [size] "d" (size), [from] "Q" (*from),		\
+		  [spec] "I" (0x81)					\
 		: "cc", "0");						\
-	__rc;								\
-})
+	return rc;							\
+}
 
-static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
-	int rc;
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
 
-	switch (size) {
-	case 1:
-		rc = __get_user_asm((unsigned char *)x,
-				    (unsigned char __user *)ptr,
-				    size);
-		break;
-	case 2:
-		rc = __get_user_asm((unsigned short *)x,
-				    (unsigned short __user *)ptr,
-				    size);
-		break;
-	case 4:
-		rc = __get_user_asm((unsigned int *)x,
-				    (unsigned int __user *)ptr,
-				    size);
-		break;
-	case 8:
-		rc = __get_user_asm((unsigned long *)x,
-				    (unsigned long __user *)ptr,
-				    size);
-		break;
-	default:
-		__get_user_bad();
-		break;
-	}
-	return rc;
+DEFINE_PUT_USER_NOINSTR(char);
+DEFINE_PUT_USER_NOINSTR(short);
+DEFINE_PUT_USER_NOINSTR(int);
+DEFINE_PUT_USER_NOINSTR(long);
+
+#define DEFINE_PUT_USER(type)						\
+static __always_inline int						\
+__put_user_##type(unsigned type __user *to, unsigned type *from,	\
+		  unsigned long size)					\
+{									\
+	int rc;								\
+									\
+	rc = __put_user_##type##_noinstr(to, from, size);		\
+	instrument_put_user(*from, to, size);				\
+	return rc;							\
 }
 
-/*
- * These are the main single-value transfer routines.  They automatically
- * use the right size if we just have the right pointer type.
- */
+DEFINE_PUT_USER(char);
+DEFINE_PUT_USER(short);
+DEFINE_PUT_USER(int);
+DEFINE_PUT_USER(long);
+
 #define __put_user(x, ptr)						\
 ({									\
 	__typeof__(*(ptr)) __x = (x);					\
-	int __pu_err = -EFAULT;						\
+	int __prc;							\
 									\
 	__chk_user_ptr(ptr);						\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
+		__prc = __put_user_char((unsigned char __user *)(ptr),	\
+					(unsigned char *)&__x,		\
+					sizeof(*(ptr)));		\
+		break;							\
 	case 2:								\
+		__prc = __put_user_short((unsigned short __user *)(ptr),\
+					 (unsigned short *)&__x,	\
+					 sizeof(*(ptr)));		\
+		break;							\
 	case 4:								\
+		__prc = __put_user_int((unsigned int __user *)(ptr),	\
+				       (unsigned int *)&__x,		\
+				       sizeof(*(ptr)));			\
+		break;							\
 	case 8:								\
-		__pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		__prc = __put_user_long((unsigned long __user *)(ptr),	\
+					(unsigned long *)&__x,		\
+					sizeof(*(ptr)));		\
 		break;							\
 	default:							\
-		__put_user_bad();					\
+		__prc = __put_user_bad();				\
 		break;							\
 	}								\
-	__builtin_expect(__pu_err, 0);					\
+	__builtin_expect(__prc, 0);					\
 })
 
 #define put_user(x, ptr)						\
@@ -220,45 +226,129 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
 	__put_user(x, ptr);						\
 })
 
+int __noreturn __get_user_bad(void);
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#define DEFINE_GET_USER_NOINSTR(type)					\
+static uaccess_kmsan_or_inline int					\
+__get_user_##type##_noinstr(unsigned type *to,				\
+			    const unsigned type __user *from,		\
+			    unsigned long size)				\
+{									\
+	asm goto(							\
+		"	lhi	%%r0,%[spec]\n"				\
+		"0:	mvcos	%[to],%[from],%[size]\n"		\
+		"1:	nopr	%%r7\n"					\
+		EX_TABLE(0b, %l[Efault])				\
+		EX_TABLE(1b, %l[Efault])				\
+		: [to] "=Q" (*to)					\
+		: [size] "d" (size), [from] "Q" (*from),		\
+		  [spec] "I" (0x81)					\
+		: "cc", "0"						\
+		: Efault						\
+		);							\
+	return 0;							\
+Efault:									\
+	*to = 0;							\
+	return -EFAULT;							\
+}
+
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+#define DEFINE_GET_USER_NOINSTR(type)					\
+static uaccess_kmsan_or_inline int					\
+__get_user_##type##_noinstr(unsigned type *to,				\
+			    const unsigned type __user *from,		\
+			    unsigned long size)				\
+{									\
+	int rc;								\
+									\
+	asm_inline volatile(						\
+		"	lhi	%%r0,%[spec]\n"				\
+		"0:	mvcos	%[to],%[from],%[size]\n"		\
+		"1:	lhi	%[rc],0\n"				\
+		"2:\n"							\
+		EX_TABLE_UA_FAULT(0b, 2b, %[rc])			\
+		EX_TABLE_UA_FAULT(1b, 2b, %[rc])			\
+		: [rc] "=d" (rc), [to] "=Q" (*to)			\
+		: [size] "d" (size), [from] "Q" (*from),		\
+		  [spec] "I" (0x81)					\
+		: "cc", "0");						\
+	if (likely(!rc))						\
+		return 0;						\
+	*to = 0;							\
+	return rc;							\
+}
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
+
+DEFINE_GET_USER_NOINSTR(char);
+DEFINE_GET_USER_NOINSTR(short);
+DEFINE_GET_USER_NOINSTR(int);
+DEFINE_GET_USER_NOINSTR(long);
+
+#define DEFINE_GET_USER(type)						\
+static __always_inline int						\
+__get_user_##type(unsigned type *to, const unsigned type __user *from,	\
+		  unsigned long size)					\
+{									\
+	int rc;								\
+									\
+	rc = __get_user_##type##_noinstr(to, from, size);		\
+	instrument_get_user(*to);					\
+	return rc;							\
+}
+
+DEFINE_GET_USER(char);
+DEFINE_GET_USER(short);
+DEFINE_GET_USER(int);
+DEFINE_GET_USER(long);
+
 #define __get_user(x, ptr)						\
 ({									\
-	int __gu_err = -EFAULT;						\
+	const __user void *____guptr = (ptr);				\
+	int __grc;							\
 									\
 	__chk_user_ptr(ptr);						\
 	switch (sizeof(*(ptr))) {					\
 	case 1: {							\
+		const unsigned char __user *__guptr = ____guptr;	\
 		unsigned char __x;					\
 									\
-		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		__grc = __get_user_char(&__x, __guptr, sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
 		break;							\
 	};								\
 	case 2: {							\
+		const unsigned short __user *__guptr = ____guptr;	\
 		unsigned short __x;					\
 									\
-		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		__grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\
 		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
 		break;							\
 	};								\
 	case 4: {							\
+		const unsigned int __user *__guptr = ____guptr;		\
 		unsigned int __x;					\
 									\
-		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		__grc = __get_user_int(&__x, __guptr, sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
 		break;							\
 	};								\
 	case 8: {							\
+		const unsigned long __user *__guptr = ____guptr;	\
 		unsigned long __x;					\
 									\
-		__gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr)));	\
+		__grc = __get_user_long(&__x, __guptr, sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *)&__x;		\
 		break;							\
 	};								\
 	default:							\
-		__get_user_bad();					\
+		__grc = __get_user_bad();				\
 		break;							\
 	}								\
-	__builtin_expect(__gu_err, 0);					\
+	__builtin_expect(__grc, 0);					\
 })
 
 #define get_user(x, ptr)						\
@@ -274,121 +364,330 @@ long __must_check strncpy_from_user(char *dst, const char __user *src, long coun
 
 long __must_check strnlen_user(const char __user *src, long count);
 
-/*
- * Zero Userspace
- */
-unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+static uaccess_kmsan_or_inline __must_check unsigned long
+__clear_user(void __user *to, unsigned long size)
+{
+	unsigned long osize;
+	int cc;
+
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	llilh	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_TO(0b, 0b)
+			EX_TABLE_UA_MVCOS_TO(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+			: [spec] "I" (0x81), [from] "Q" (*(const char *)empty_zero_page)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (__builtin_constant_p(osize) && osize <= 4096)
+			return osize - size;
+		if (CC_TRANSFORM(cc) == 0)
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+	}
+}
 
-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+static __always_inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
 {
 	might_fault();
 	return __clear_user(to, n);
 }
 
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count);
-void *s390_kernel_write(void *dst, const void *src, size_t size);
+void *__s390_kernel_write(void *dst, const void *src, size_t size);
 
-int __noreturn __put_kernel_bad(void);
+static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
+{
+	if (__is_defined(__DECOMPRESSOR))
+		return memcpy(dst, src, size);
+	return __s390_kernel_write(dst, src, size);
+}
 
-#define __put_kernel_asm(val, to, insn)					\
-({									\
-	int __rc;							\
-									\
-	asm volatile(							\
-		"0:   " insn "  %[_val],%[_to]\n"			\
-		"1:	xr	%[rc],%[rc]\n"				\
-		"2:\n"							\
-		EX_TABLE_UA_STORE(0b, 2b, %[rc])			\
-		EX_TABLE_UA_STORE(1b, 2b, %[rc])			\
-		: [rc] "=d" (__rc), [_to] "+Q" (*(to))			\
-		: [_val] "d" (val)					\
-		: "cc");						\
-	__rc;								\
-})
+void __noreturn __mvc_kernel_nofault_bad(void);
 
-#define __put_kernel_nofault(dst, src, type, err_label)			\
+#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS)
+
+#define __mvc_kernel_nofault(dst, src, type, err_label)			\
 do {									\
-	unsigned long __x = (unsigned long)(*((type *)(src)));		\
-	int __pk_err;							\
-									\
 	switch (sizeof(type)) {						\
 	case 1:								\
-		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
-		break;							\
 	case 2:								\
-		__pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
-		break;							\
 	case 4:								\
-		__pk_err = __put_kernel_asm(__x, (type *)(dst), "st");	\
-		break;							\
 	case 8:								\
-		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+		asm goto(						\
+			"0:	mvc	%O[_dst](%[_len],%R[_dst]),%[_src]\n" \
+			"1:	nopr	%%r7\n"				\
+			EX_TABLE(0b, %l[err_label])			\
+			EX_TABLE(1b, %l[err_label])			\
+			: [_dst] "=Q" (*(type *)dst)			\
+			: [_src] "Q" (*(type *)(src)),			\
+			  [_len] "I" (sizeof(type))			\
+			:						\
+			: err_label);					\
 		break;							\
 	default:							\
-		__pk_err = __put_kernel_bad();				\
+		__mvc_kernel_nofault_bad();				\
 		break;							\
 	}								\
-	if (unlikely(__pk_err))						\
-		goto err_label;						\
 } while (0)
 
-int __noreturn __get_kernel_bad(void);
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
 
-#define __get_kernel_asm(val, from, insn)				\
-({									\
-	int __rc;							\
-									\
-	asm volatile(							\
-		"0:   " insn "  %[_val],%[_from]\n"			\
-		"1:	xr	%[rc],%[rc]\n"				\
-		"2:\n"							\
-		EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val])		\
-		EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val])		\
-		: [rc] "=d" (__rc), [_val] "=d" (val)			\
-		: [_from] "Q" (*(from))					\
-		: "cc");						\
-	__rc;								\
-})
-
-#define __get_kernel_nofault(dst, src, type, err_label)			\
+#define __mvc_kernel_nofault(dst, src, type, err_label)			\
 do {									\
-	int __gk_err;							\
+	type *(__dst) = (type *)(dst);					\
+	int __rc;							\
 									\
 	switch (sizeof(type)) {						\
-	case 1: {							\
-		unsigned char __x;					\
-									\
-		__gk_err = __get_kernel_asm(__x, (type *)(src), "ic");	\
-		*((type *)(dst)) = (type)__x;				\
-		break;							\
-	};								\
-	case 2: {							\
-		unsigned short __x;					\
-									\
-		__gk_err = __get_kernel_asm(__x, (type *)(src), "lh");	\
-		*((type *)(dst)) = (type)__x;				\
-		break;							\
-	};								\
-	case 4: {							\
-		unsigned int __x;					\
-									\
-		__gk_err = __get_kernel_asm(__x, (type *)(src), "l");	\
-		*((type *)(dst)) = (type)__x;				\
-		break;							\
-	};								\
-	case 8: {							\
-		unsigned long __x;					\
-									\
-		__gk_err = __get_kernel_asm(__x, (type *)(src), "lg");	\
-		*((type *)(dst)) = (type)__x;				\
+	case 1:								\
+	case 2:								\
+	case 4:								\
+	case 8:								\
+		asm_inline volatile(					\
+			"0:	mvc	0(%[_len],%[_dst]),%[_src]\n"	\
+			"1:	lhi	%[_rc],0\n"			\
+			"2:\n"						\
+			EX_TABLE_UA_FAULT(0b, 2b, %[_rc])		\
+			EX_TABLE_UA_FAULT(1b, 2b, %[_rc])		\
+			: [_rc] "=d" (__rc),				\
+			  "=m" (*__dst)					\
+			: [_src] "Q" (*(type *)(src)),			\
+			[_dst] "a" (__dst),				\
+			[_len] "I" (sizeof(type)));			\
+		if (__rc)						\
+			goto err_label;					\
 		break;							\
-	};								\
 	default:							\
-		__gk_err = __get_kernel_bad();				\
+		__mvc_kernel_nofault_bad();				\
 		break;							\
 	}								\
-	if (unlikely(__gk_err))						\
-		goto err_label;						\
 } while (0)
 
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */
+
+#define __get_kernel_nofault __mvc_kernel_nofault
+#define __put_kernel_nofault __mvc_kernel_nofault
+
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+					      __uint128_t old, __uint128_t new,
+					      unsigned long key, int size)
+{
+	bool sacf_flag;
+	int rc = 0;
+
+	switch (size) {
+	case 1: {
+		unsigned int prev, shift, mask, _old, _new;
+		unsigned long count;
+
+		shift = (3 ^ (address & 3)) << 3;
+		address ^= address & 3;
+		_old = ((unsigned int)old & 0xff) << shift;
+		_new = ((unsigned int)new & 0xff) << shift;
+		mask = ~(0xff << shift);
+		sacf_flag = enable_sacf_uaccess();
+		asm_inline volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"	llill	%[count],%[max_loops]\n"
+			"0:	l	%[prev],%[address]\n"
+			"1:	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"2:	lr	%[tmp],%[prev]\n"
+			"3:	cs	%[prev],%[new],%[address]\n"
+			"4:	jnl	5f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jnz	5f\n"
+			"	brct	%[count],2b\n"
+			"5:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (_old),
+			  [new] "+&d" (_new),
+			  [mask] "+&d" (mask),
+			  [count] "=a" (count)
+			: [key] "%[count]" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY),
+			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+			: "memory", "cc");
+		disable_sacf_uaccess(sacf_flag);
+		*(unsigned char *)uval = prev >> shift;
+		if (!count)
+			rc = -EAGAIN;
+		return rc;
+	}
+	case 2: {
+		unsigned int prev, shift, mask, _old, _new;
+		unsigned long count;
+
+		shift = (2 ^ (address & 2)) << 3;
+		address ^= address & 2;
+		_old = ((unsigned int)old & 0xffff) << shift;
+		_new = ((unsigned int)new & 0xffff) << shift;
+		mask = ~(0xffff << shift);
+		sacf_flag = enable_sacf_uaccess();
+		asm_inline volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"	llill	%[count],%[max_loops]\n"
+			"0:	l	%[prev],%[address]\n"
+			"1:	nr	%[prev],%[mask]\n"
+			"	xilf	%[mask],0xffffffff\n"
+			"	or	%[new],%[prev]\n"
+			"	or	%[prev],%[tmp]\n"
+			"2:	lr	%[tmp],%[prev]\n"
+			"3:	cs	%[prev],%[new],%[address]\n"
+			"4:	jnl	5f\n"
+			"	xr	%[tmp],%[prev]\n"
+			"	xr	%[new],%[tmp]\n"
+			"	nr	%[tmp],%[mask]\n"
+			"	jnz	5f\n"
+			"	brct	%[count],2b\n"
+			"5:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "=&d" (prev),
+			  [address] "+Q" (*(int *)address),
+			  [tmp] "+&d" (_old),
+			  [new] "+&d" (_new),
+			  [mask] "+&d" (mask),
+			  [count] "=a" (count)
+			: [key] "%[count]" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY),
+			  [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+			: "memory", "cc");
+		disable_sacf_uaccess(sacf_flag);
+		*(unsigned short *)uval = prev >> shift;
+		if (!count)
+			rc = -EAGAIN;
+		return rc;
+	}
+	case 4:	{
+		unsigned int prev = old;
+
+		sacf_flag = enable_sacf_uaccess();
+		asm_inline volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	cs	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+Q" (*(int *)address)
+			: [new] "d" ((unsigned int)new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		disable_sacf_uaccess(sacf_flag);
+		*(unsigned int *)uval = prev;
+		return rc;
+	}
+	case 8: {
+		unsigned long prev = old;
+
+		sacf_flag = enable_sacf_uaccess();
+		asm_inline volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	csg	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+QS" (*(long *)address)
+			: [new] "d" ((unsigned long)new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		disable_sacf_uaccess(sacf_flag);
+		*(unsigned long *)uval = prev;
+		return rc;
+	}
+	case 16: {
+		__uint128_t prev = old;
+
+		sacf_flag = enable_sacf_uaccess();
+		asm_inline volatile(
+			"	spka	0(%[key])\n"
+			"	sacf	256\n"
+			"0:	cdsg	%[prev],%[new],%[address]\n"
+			"1:	sacf	768\n"
+			"	spka	%[default_key]\n"
+			EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+			EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+			: [rc] "+&d" (rc),
+			  [prev] "+&d" (prev),
+			  [address] "+QS" (*(__int128_t *)address)
+			: [new] "d" (new),
+			  [key] "a" (key << 4),
+			  [default_key] "J" (PAGE_DEFAULT_KEY)
+			: "memory", "cc");
+		disable_sacf_uaccess(sacf_flag);
+		*(__uint128_t *)uval = prev;
+		return rc;
+	}
+	}
+	__cmpxchg_user_key_called_with_bad_pointer();
+	return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ *	 @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ *	 determine if the exchange occurs. The old value read from *@ptr is
+ *	 written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return:     0: cmpxchg executed
+ *	       -EFAULT: an exception happened when trying to access *@ptr
+ *	       -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key)			\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(uval) __uval = (uval);				\
+									\
+	BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval)));		\
+	might_fault();							\
+	__chk_user_ptr(__ptr);						\
+	__cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval),	\
+			   (old), (new), (key), sizeof(*(__ptr)));	\
+})
+
 #endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 4260bc5ce7f8..70fc671397da 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -35,6 +35,5 @@
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_CLONE3
 
 #endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 0bf06f1682d8..b8ecf04e3468 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -4,7 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
-#include <linux/kprobes.h>
+#include <linux/rethook.h>
 #include <linux/llist.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
@@ -43,13 +43,15 @@ struct unwind_state {
 	bool error;
 };
 
-/* Recover the return address modified by kretprobe and ftrace_graph. */
+/* Recover the return address modified by rethook and ftrace_graph. */
 static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
 						    unsigned long ip)
 {
-	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
-	if (is_kretprobe_trampoline(ip))
-		ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
+#ifdef CONFIG_RETHOOK
+	if (is_rethook_trampoline(ip))
+		ip = rethook_find_ret_addr(state->task, state->sp, &state->kr_cur);
+#endif
 	return ip;
 }
 
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index cfea7b77a5b8..8018549a1ad2 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -2,7 +2,7 @@
 /*
  * Ultravisor Interfaces
  *
- * Copyright IBM Corp. 2019, 2022
+ * Copyright IBM Corp. 2019, 2024
  *
  * Author(s):
  *	Vasily Gorbik <gor@linux.ibm.com>
@@ -16,7 +16,7 @@
 #include <linux/bug.h>
 #include <linux/sched.h>
 #include <asm/page.h>
-#include <asm/gmap.h>
+#include <asm/asm.h>
 
 #define UVC_CC_OK	0
 #define UVC_CC_ERROR	1
@@ -28,12 +28,15 @@
 #define UVC_RC_INV_STATE	0x0003
 #define UVC_RC_INV_LEN		0x0005
 #define UVC_RC_NO_RESUME	0x0007
+#define UVC_RC_MORE_DATA	0x0100
 #define UVC_RC_NEED_DESTROY	0x8000
 
 #define UVC_CMD_QUI			0x0001
+#define UVC_CMD_QUERY_KEYS		0x0002
 #define UVC_CMD_INIT_UV			0x000f
 #define UVC_CMD_CREATE_SEC_CONF		0x0100
 #define UVC_CMD_DESTROY_SEC_CONF	0x0101
+#define UVC_CMD_DESTROY_SEC_CONF_FAST	0x0102
 #define UVC_CMD_CREATE_SEC_CPU		0x0120
 #define UVC_CMD_DESTROY_SEC_CPU		0x0121
 #define UVC_CMD_CONV_TO_SEC_STOR	0x0200
@@ -50,9 +53,17 @@
 #define UVC_CMD_SET_UNSHARE_ALL		0x0340
 #define UVC_CMD_PIN_PAGE_SHARED		0x0341
 #define UVC_CMD_UNPIN_PAGE_SHARED	0x0342
+#define UVC_CMD_DUMP_INIT		0x0400
+#define UVC_CMD_DUMP_CONF_STOR_STATE	0x0401
+#define UVC_CMD_DUMP_CPU		0x0402
+#define UVC_CMD_DUMP_COMPLETE		0x0403
 #define UVC_CMD_SET_SHARED_ACCESS	0x1000
 #define UVC_CMD_REMOVE_SHARED_ACCESS	0x1001
 #define UVC_CMD_RETR_ATTEST		0x1020
+#define UVC_CMD_ADD_SECRET		0x1031
+#define UVC_CMD_LIST_SECRETS		0x1033
+#define UVC_CMD_LOCK_SECRETS		0x1034
+#define UVC_CMD_RETR_SECRET		0x1035
 
 /* Bits in installed uv calls */
 enum uv_cmds_inst {
@@ -77,12 +88,24 @@ enum uv_cmds_inst {
 	BIT_UVC_CMD_UNSHARE_ALL = 20,
 	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
 	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+	BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23,
+	BIT_UVC_CMD_DUMP_INIT = 24,
+	BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
+	BIT_UVC_CMD_DUMP_CPU = 26,
+	BIT_UVC_CMD_DUMP_COMPLETE = 27,
 	BIT_UVC_CMD_RETR_ATTEST = 28,
+	BIT_UVC_CMD_ADD_SECRET = 29,
+	BIT_UVC_CMD_LIST_SECRETS = 30,
+	BIT_UVC_CMD_LOCK_SECRETS = 31,
+	BIT_UVC_CMD_RETR_SECRET = 33,
+	BIT_UVC_CMD_QUERY_KEYS = 34,
 };
 
 enum uv_feat_ind {
 	BIT_UV_FEAT_MISC = 0,
 	BIT_UV_FEAT_AIV = 1,
+	BIT_UV_FEAT_AP = 4,
+	BIT_UV_FEAT_AP_INTR = 5,
 };
 
 struct uv_cb_header {
@@ -107,12 +130,42 @@ struct uv_cb_qui {
 	u32 reserved70[3];			/* 0x0070 */
 	u32 max_num_sec_conf;			/* 0x007c */
 	u64 max_guest_stor_addr;		/* 0x0080 */
-	u8  reserved88[158 - 136];		/* 0x0088 */
+	u8  reserved88[0x9e - 0x88];		/* 0x0088 */
 	u16 max_guest_cpu_id;			/* 0x009e */
 	u64 uv_feature_indications;		/* 0x00a0 */
-	u8  reserveda8[200 - 168];		/* 0x00a8 */
+	u64 reserveda8;				/* 0x00a8 */
+	u64 supp_se_hdr_versions;		/* 0x00b0 */
+	u64 supp_se_hdr_pcf;			/* 0x00b8 */
+	u64 reservedc0;				/* 0x00c0 */
+	u64 conf_dump_storage_state_len;	/* 0x00c8 */
+	u64 conf_dump_finalize_len;		/* 0x00d0 */
+	u64 reservedd8;				/* 0x00d8 */
+	u64 supp_att_req_hdr_ver;		/* 0x00e0 */
+	u64 supp_att_pflags;			/* 0x00e8 */
+	u64 reservedf0;				/* 0x00f0 */
+	u64 supp_add_secret_req_ver;		/* 0x00f8 */
+	u64 supp_add_secret_pcf;		/* 0x0100 */
+	u64 supp_secret_types;			/* 0x0108 */
+	u16 max_assoc_secrets;			/* 0x0110 */
+	u16 max_retr_secrets;			/* 0x0112 */
+	u8 reserved114[0x120 - 0x114];		/* 0x0114 */
 } __packed __aligned(8);
 
+struct uv_key_hash {
+	u64 dword[4];
+} __packed __aligned(8);
+
+#define UVC_QUERY_KEYS_IDX_HK		0
+#define UVC_QUERY_KEYS_IDX_BACK_HK	1
+
+/* Query Ultravisor Keys */
+struct uv_cb_query_keys {
+	struct uv_cb_header header;		/* 0x0000 */
+	u64 reserved08[3];			/* 0x0008 */
+	struct uv_key_hash key_hashes[15];	/* 0x0020 */
+} __packed __aligned(8);
+static_assert(sizeof(struct uv_cb_query_keys) == 0x200);
+
 /* Initialize Ultravisor */
 struct uv_cb_init {
 	struct uv_cb_header header;
@@ -129,7 +182,15 @@ struct uv_cb_cgc {
 	u64 guest_handle;
 	u64 conf_base_stor_origin;
 	u64 conf_virt_stor_origin;
-	u64 reserved30;
+	u8  reserved30[6];
+	union {
+		struct {
+			u16 : 14;
+			u16 ap_instr_intr : 1;
+			u16 ap_allow_instr : 1;
+		};
+		u16 raw;
+	} flags;
 	u64 guest_stor_origin;
 	u64 guest_stor_len;
 	u64 guest_sca;
@@ -213,6 +274,14 @@ struct uv_cb_nodata {
 	u64 reserved20[4];
 } __packed __aligned(8);
 
+/* Destroy Configuration Fast */
+struct uv_cb_destroy_fast {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 handle;
+	u64 reserved20[5];
+} __packed __aligned(8);
+
 /* Set Shared Access */
 struct uv_cb_share {
 	struct uv_cb_header header;
@@ -240,18 +309,139 @@ struct uv_cb_attest {
 	u64 reserved168[4];		/* 0x0168 */
 } __packed __aligned(8);
 
+struct uv_cb_dump_cpu {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u64 dump_area_origin;
+	u64 reserved28[5];
+} __packed __aligned(8);
+
+struct uv_cb_dump_stor_state {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 config_handle;
+	u64 dump_area_origin;
+	u64 gaddr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+struct uv_cb_dump_complete {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 config_handle;
+	u64 dump_area_origin;
+	u64 reserved30[5];
+} __packed __aligned(8);
+
+/*
+ * A common UV call struct for pv guests that contains a single address
+ * Examples:
+ * Add Secret
+ */
+struct uv_cb_guest_addr {
+	struct uv_cb_header header;
+	u64 reserved08[3];
+	u64 addr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+#define UVC_RC_RETR_SECR_BUF_SMALL	0x0109
+#define UVC_RC_RETR_SECR_STORE_EMPTY	0x010f
+#define UVC_RC_RETR_SECR_INV_IDX	0x0110
+#define UVC_RC_RETR_SECR_INV_SECRET	0x0111
+
+struct uv_cb_retr_secr {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u16 secret_idx;
+	u16 reserved1a;
+	u32 buf_size;
+	u64 buf_addr;
+	u64 reserved28[4];
+}  __packed __aligned(8);
+
+struct uv_cb_list_secrets {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u8  reserved18[6];
+	u16 start_idx;
+	u64 list_addr;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+enum uv_secret_types {
+	UV_SECRET_INVAL = 0x0,
+	UV_SECRET_NULL = 0x1,
+	UV_SECRET_ASSOCIATION = 0x2,
+	UV_SECRET_PLAIN = 0x3,
+	UV_SECRET_AES_128 = 0x4,
+	UV_SECRET_AES_192 = 0x5,
+	UV_SECRET_AES_256 = 0x6,
+	UV_SECRET_AES_XTS_128 = 0x7,
+	UV_SECRET_AES_XTS_256 = 0x8,
+	UV_SECRET_HMAC_SHA_256 = 0x9,
+	UV_SECRET_HMAC_SHA_512 = 0xa,
+	/* 0x0b - 0x10 reserved */
+	UV_SECRET_ECDSA_P256 = 0x11,
+	UV_SECRET_ECDSA_P384 = 0x12,
+	UV_SECRET_ECDSA_P521 = 0x13,
+	UV_SECRET_ECDSA_ED25519 = 0x14,
+	UV_SECRET_ECDSA_ED448 = 0x15,
+};
+
+/**
+ * uv_secret_list_item_hdr - UV secret metadata.
+ * @index: Index of the secret in the secret list.
+ * @type: Type of the secret. See `enum uv_secret_types`.
+ * @length: Length of the stored secret.
+ */
+struct uv_secret_list_item_hdr {
+	u16 index;
+	u16 type;
+	u32 length;
+} __packed __aligned(8);
+
+#define UV_SECRET_ID_LEN 32
+/**
+ * uv_secret_list_item - UV secret entry.
+ * @hdr: The metadata of this secret.
+ * @id: The ID of this secret, not the secret itself.
+ */
+struct uv_secret_list_item {
+	struct uv_secret_list_item_hdr hdr;
+	u64 reserverd08;
+	u8 id[UV_SECRET_ID_LEN];
+} __packed __aligned(8);
+
+/**
+ * uv_secret_list - UV secret-metadata list.
+ * @num_secr_stored: Number of secrets stored in this list.
+ * @total_num_secrets: Number of secrets stored in the UV for this guest.
+ * @next_secret_idx: positive number if there are more secrets available or zero.
+ * @secrets: Up to 85 UV-secret metadata entries.
+ */
+struct uv_secret_list {
+	u16 num_secr_stored;
+	u16 total_num_secrets;
+	u16 next_secret_idx;
+	u16 reserved_06;
+	u64 reserved_08;
+	struct uv_secret_list_item secrets[85];
+} __packed __aligned(8);
+static_assert(sizeof(struct uv_secret_list) == PAGE_SIZE);
+
 static inline int __uv_call(unsigned long r1, unsigned long r2)
 {
 	int cc;
 
 	asm volatile(
-		"	.insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=d" (cc)
+		"	.insn	 rrf,0xb9a40000,%[r1],%[r2],0,0\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
 		: [r1] "a" (r1), [r2] "a" (r2)
-		: "memory", "cc");
-	return cc;
+		: CC_CLOBBER_LIST("memory"));
+	return CC_TRANSFORM(cc);
 }
 
 static inline int uv_call(unsigned long r1, unsigned long r2)
@@ -296,6 +486,48 @@ static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
 	return cc ? -EINVAL : 0;
 }
 
+/**
+ * uv_list_secrets() - Do a List Secrets UVC.
+ *
+ * @buf: Buffer to write list into; size of one page.
+ * @start_idx: The smallest index that should be included in the list.
+ *		For the fist invocation use 0.
+ * @rc: Pointer to store the return code or NULL.
+ * @rrc: Pointer to store the return reason code or NULL.
+ *
+ * This function calls the List Secrets UVC. The result is written into `buf`,
+ * that needs to be at least one page of writable memory.
+ * `buf` consists of:
+ * * %struct uv_secret_list_hdr
+ * * %struct uv_secret_list_item (multiple)
+ *
+ * For `start_idx` use _0_ for the first call. If there are more secrets available
+ * but could not fit into the page then `rc` is `UVC_RC_MORE_DATA`.
+ * In this case use `uv_secret_list_hdr.next_secret_idx` for `start_idx`.
+ *
+ * Context: might sleep.
+ *
+ * Return: The UVC condition code.
+ */
+static inline int uv_list_secrets(struct uv_secret_list *buf, u16 start_idx,
+				  u16 *rc, u16 *rrc)
+{
+	struct uv_cb_list_secrets uvcb = {
+		.header.len = sizeof(uvcb),
+		.header.cmd = UVC_CMD_LIST_SECRETS,
+		.start_idx = start_idx,
+		.list_addr = (u64)buf,
+	};
+	int cc = uv_call_sched(0, (u64)&uvcb);
+
+	if (rc)
+		*rc = uvcb.header.rc;
+	if (rrc)
+		*rrc = uvcb.header.rrc;
+
+	return cc;
+}
+
 struct uv_info {
 	unsigned long inst_calls_list[4];
 	unsigned long uv_base_stor_len;
@@ -307,11 +539,28 @@ struct uv_info {
 	unsigned int max_num_sec_conf;
 	unsigned short max_guest_cpu_id;
 	unsigned long uv_feature_indications;
+	unsigned long supp_se_hdr_ver;
+	unsigned long supp_se_hdr_pcf;
+	unsigned long conf_dump_storage_state_len;
+	unsigned long conf_dump_finalize_len;
+	unsigned long supp_att_req_hdr_ver;
+	unsigned long supp_att_pflags;
+	unsigned long supp_add_secret_req_ver;
+	unsigned long supp_add_secret_pcf;
+	unsigned long supp_secret_types;
+	unsigned short max_assoc_secrets;
+	unsigned short max_retr_secrets;
 };
 
 extern struct uv_info uv_info;
 
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+static inline bool uv_has_feature(u8 feature_bit)
+{
+	if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+		return false;
+	return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
 extern int prot_virt_guest;
 
 static inline int is_prot_virt_guest(void)
@@ -339,7 +588,10 @@ static inline int share(unsigned long addr, u16 cmd)
 
 	if (!uv_call(0, (u64)&uvcb))
 		return 0;
-	return -EINVAL;
+	pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n",
+	       uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare",
+	       uvcb.header.rc, uvcb.header.rrc);
+	panic("System security cannot be guaranteed unless the system panics now.\n");
 }
 
 /*
@@ -363,13 +615,11 @@ static inline int uv_remove_shared(unsigned long addr)
 	return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
 }
 
-#else
-#define is_prot_virt_guest() 0
-static inline int uv_set_shared(unsigned long addr) { return 0; }
-static inline int uv_remove_shared(unsigned long addr) { return 0; }
-#endif
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+		   struct uv_secret_list *list,
+		   struct uv_secret_list_item_hdr *secret);
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size);
 
-#if IS_ENABLED(CONFIG_KVM)
 extern int prot_virt_host;
 
 static inline int is_prot_virt_host(void)
@@ -377,31 +627,14 @@ static inline int is_prot_virt_host(void)
 	return prot_virt_host;
 }
 
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int uv_destroy_owned_page(unsigned long paddr);
+int uv_pin_shared(unsigned long paddr);
+int uv_destroy_folio(struct folio *folio);
+int uv_destroy_pte(pte_t pte);
+int uv_convert_from_secure_pte(pte_t pte);
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb);
 int uv_convert_from_secure(unsigned long paddr);
-int uv_convert_owned_from_secure(unsigned long paddr);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int uv_convert_from_secure_folio(struct folio *folio);
 
 void setup_uv(void);
-#else
-#define is_prot_virt_host() 0
-static inline void setup_uv(void) {}
-
-static inline int uv_destroy_owned_page(unsigned long paddr)
-{
-	return 0;
-}
-
-static inline int uv_convert_from_secure(unsigned long paddr)
-{
-	return 0;
-}
-
-static inline int uv_convert_owned_from_secure(unsigned long paddr)
-{
-	return 0;
-}
-#endif
 
 #endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vdso-symbols.h b/arch/s390/include/asm/vdso-symbols.h
new file mode 100644
index 000000000000..0df17574d788
--- /dev/null
+++ b/arch/s390/include/asm/vdso-symbols.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_VDSO_SYMBOLS_H__
+#define __S390_VDSO_SYMBOLS_H__
+
+#include <generated/vdso64-offsets.h>
+#ifdef CONFIG_COMPAT
+#include <generated/vdso32-offsets.h>
+#endif
+
+#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
+#ifdef CONFIG_COMPAT
+#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
+#else
+#define VDSO32_SYMBOL(tsk, name) (-1UL)
+#endif
+
+#endif /* __S390_VDSO_SYMBOLS_H__ */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 53165aa7813a..420a073fdde5 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -6,28 +6,11 @@
 
 #ifndef __ASSEMBLY__
 
-#include <generated/vdso64-offsets.h>
-#ifdef CONFIG_COMPAT
-#include <generated/vdso32-offsets.h>
-#endif
-
-#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name))
-#ifdef CONFIG_COMPAT
-#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name))
-#else
-#define VDSO32_SYMBOL(tsk, name) (-1UL)
-#endif
-
-extern struct vdso_data *vdso_data;
-
 int vdso_getcpu_init(void);
 
 #endif /* __ASSEMBLY__ */
 
-/* Default link address for the vDSO */
-#define VDSO_LBASE	0
-
-#define __VVAR_PAGES	2
+#define __VDSO_PAGES	4
 
 #define VDSO_VERSION_STRING	LINUX_2.6.29
 
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
deleted file mode 100644
index 73ee89142666..000000000000
--- a/arch/s390/include/asm/vdso/data.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __S390_ASM_VDSO_DATA_H
-#define __S390_ASM_VDSO_DATA_H
-
-#include <linux/types.h>
-#include <vdso/datapage.h>
-
-struct arch_vdso_data {
-	__s64 tod_steering_delta;
-	__u64 tod_steering_end;
-};
-
-#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..f8713ce39bb2
--- /dev/null
+++ b/arch/s390/include/asm/vdso/getrandom.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer:	Destination buffer to fill with random bytes.
+ * @len:	Size of @buffer in bytes.
+ * @flags:	Zero or more GRND_* flags.
+ * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+	return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index db84942eb78f..fb4564308e9d 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -6,23 +6,15 @@
 
 #define VDSO_HAS_CLOCK_GETRES 1
 
+#define VDSO_DELTA_NOMASK 1
+
 #include <asm/syscall.h>
 #include <asm/timex.h>
 #include <asm/unistd.h>
 #include <linux/compiler.h>
 
-#define vdso_calc_delta __arch_vdso_calc_delta
-static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
-	return (cycles - last) * mult;
-}
-
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
-{
-	return _vdso_data;
-}
 
-static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd)
 {
 	u64 adj, now;
 
@@ -52,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
 	return syscall2(__NR_clock_getres, (long)clkid, (long)ts);
 }
 
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
-	return _timens_data;
-}
-#endif
-
 #endif
diff --git a/arch/s390/include/asm/vdso/time_data.h b/arch/s390/include/asm/vdso/time_data.h
new file mode 100644
index 000000000000..8a08752422e6
--- /dev/null
+++ b/arch/s390/include/asm/vdso/time_data.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_TIME_DATA_H
+#define __S390_ASM_VDSO_TIME_DATA_H
+
+#include <linux/types.h>
+
+struct arch_vdso_time_data {
+	__s64 tod_steering_delta;
+	__u64 tod_steering_end;
+};
+
+#endif /* __S390_ASM_VDSO_TIME_DATA_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
index 6c67c08cefdd..d346ebe51301 100644
--- a/arch/s390/include/asm/vdso/vsyscall.h
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -5,18 +5,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/hrtimer.h>
-#include <linux/timekeeper_internal.h>
 #include <vdso/datapage.h>
 #include <asm/vdso.h>
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-
-static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
-{
-	return vdso_data;
-}
-#define __arch_get_k_vdso_data __s390_get_k_vdso_data
 
 /* The asm-generic header needs to be included after the definitions above */
 #include <asm-generic/vdso/vsyscall.h>
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
deleted file mode 100644
index 605dc46bac5e..000000000000
--- a/arch/s390/include/asm/vga.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_VGA_H
-#define _ASM_S390_VGA_H
-
-/* Avoid compile errors due to missing asm/vga.h */
-
-#endif /* _ASM_S390_VGA_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index fe17e448c0c5..9d25fb35a042 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -2,20 +2,22 @@
 #ifndef _S390_VTIME_H
 #define _S390_VTIME_H
 
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
 static inline void update_timer_sys(void)
 {
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
-	S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
-	S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+	struct lowcore *lc = get_lowcore();
+
+	lc->system_timer += lc->last_update_timer - lc->exit_timer;
+	lc->user_timer += lc->exit_timer - lc->sys_enter_timer;
+	lc->last_update_timer = lc->sys_enter_timer;
 }
 
 static inline void update_timer_mcck(void)
 {
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
-	S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
-	S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
+	struct lowcore *lc = get_lowcore();
+
+	lc->system_timer += lc->last_update_timer - lc->exit_timer;
+	lc->user_timer += lc->exit_timer - lc->mcck_enter_timer;
+	lc->last_update_timer = lc->mcck_enter_timer;
 }
 
 #endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/word-at-a-time.h b/arch/s390/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..eaa19dee7699
--- /dev/null
+++ b/arch/s390/include/asm/word-at-a-time.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
+#include <asm/asm-extable.h>
+#include <asm/bitsperlong.h>
+
+struct word_at_a_time {
+	const unsigned long bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x7f) }
+
+static inline unsigned long prep_zero_mask(unsigned long val, unsigned long data, const struct word_at_a_time *c)
+{
+	return data;
+}
+
+static inline unsigned long create_zero_mask(unsigned long data)
+{
+	return __fls(data);
+}
+
+static inline unsigned long find_zero(unsigned long data)
+{
+	return (data ^ (BITS_PER_LONG - 1)) >> 3;
+}
+
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+{
+	unsigned long mask = (val & c->bits) + c->bits;
+
+	*data = ~(mask | val | c->bits);
+	return *data;
+}
+
+static inline unsigned long zero_bytemask(unsigned long data)
+{
+	return ~1UL << data;
+}
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long data;
+
+	asm_inline volatile(
+		"0:	lg	%[data],0(%[addr])\n"
+		"1:	nopr	%%r7\n"
+		EX_TABLE_ZEROPAD(0b, 1b, %[data], %[addr])
+		EX_TABLE_ZEROPAD(1b, 1b, %[data], %[addr])
+		: [data] "=d" (data)
+		: [addr] "a" (addr), "m" (*(unsigned long *)addr));
+	return data;
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h
index ecbe94941403..115434ab98fb 100644
--- a/arch/s390/include/uapi/asm/cmb.h
+++ b/arch/s390/include/uapi/asm/cmb.h
@@ -31,7 +31,7 @@
 struct cmbdata {
 	__u64 size;
 	__u64 elapsed_time;
- /* basic and exended format: */
+ /* basic and extended format: */
 	__u64 ssch_rsch_count;
 	__u64 sample_count;
 	__u64 device_connect_time;
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 9ec86fae9980..7c364b33c84d 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -24,7 +24,7 @@
 /*
  * struct dasd_information2_t
  * represents any data about the device, which is visible to userspace.
- *  including foramt and featueres.
+ *  including format and featueres.
  */
 typedef struct dasd_information2_t {
 	unsigned int devno;	    /* S/390 devno */
@@ -78,6 +78,7 @@ typedef struct dasd_information2_t {
  * 0x040: give access to raw eckd data
  * 0x080: enable discard support
  * 0x100: enable autodisable for IFCC errors (default)
+ * 0x200: enable requeue of all requests on autoquiesce
  */
 #define DASD_FEATURE_READONLY	      0x001
 #define DASD_FEATURE_USEDIAG	      0x002
@@ -88,6 +89,7 @@ typedef struct dasd_information2_t {
 #define DASD_FEATURE_USERAW	      0x040
 #define DASD_FEATURE_DISCARD	      0x080
 #define DASD_FEATURE_PATH_AUTODISABLE 0x100
+#define DASD_FEATURE_REQUEUEQUIESCE   0x200
 #define DASD_FEATURE_DEFAULT	      DASD_FEATURE_PATH_AUTODISABLE
 
 #define DASD_PARTN_BITS 2
@@ -183,6 +185,18 @@ typedef struct format_data_t {
 } format_data_t;
 
 /*
+ * struct dasd_copypair_swap_data_t
+ * represents all data necessary to issue a swap of the copy pair relation
+ */
+struct dasd_copypair_swap_data_t {
+	char primary[20]; /* BUSID of primary */
+	char secondary[20]; /* BUSID of secondary */
+
+	/* Reserved for future updates. */
+	__u8 reserved[64];
+};
+
+/*
  * values to be used for format_data_t.intensity
  * 0/8: normal format
  * 1/9: also write record zero
@@ -280,7 +294,7 @@ struct dasd_snid_ioctl_data {
 /********************************************************************************
  * SECTION: Definition of IOCTLs
  *
- * Here ist how the ioctl-nr should be used:
+ * Here is how the ioctl-nr should be used:
  *    0 -   31   DASD driver itself
  *   32 -  239   still open
  *  240 -  255	 reserved for EMC
@@ -326,6 +340,8 @@ struct dasd_snid_ioctl_data {
 #define BIODASDSATTR   _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
 /* Release Allocated Space */
 #define BIODASDRAS     _IOW(DASD_IOCTL_LETTER, 3, format_data_t)
+/* Swap copy pair relation */
+#define BIODASDCOPYPAIRSWAP _IOW(DASD_IOCTL_LETTER, 4, struct dasd_copypair_swap_data_t)
 
 /* Get Sense Path Group ID (SNID) data */
 #define BIODASDSNID    _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h
new file mode 100644
index 000000000000..b7e6ccb4ff6e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/diag.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Diag ioctls and its associated structures definitions.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#ifndef __S390_UAPI_ASM_DIAG_H
+#define __S390_UAPI_ASM_DIAG_H
+
+#include <linux/types.h>
+
+#define DIAG_MAGIC_STR 'D'
+
+struct diag324_pib {
+	__u64 address;
+	__u64 sequence;
+};
+
+struct diag310_memtop {
+	__u64 address;
+	__u64 nesting_lvl;
+};
+
+/* Diag ioctl definitions */
+#define DIAG324_GET_PIBBUF	_IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib)
+#define DIAG324_GET_PIBLEN	_IOR(DIAG_MAGIC_STR, 0x78, size_t)
+#define DIAG310_GET_STRIDE	_IOR(DIAG_MAGIC_STR, 0x79, size_t)
+#define DIAG310_GET_MEMTOPLEN	_IOWR(DIAG_MAGIC_STR, 0x7a, size_t)
+#define DIAG310_GET_MEMTOPBUF	_IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop)
+
+#endif /* __S390_UAPI_ASM_DIAG_H */
diff --git a/arch/s390/include/uapi/asm/fs3270.h b/arch/s390/include/uapi/asm/fs3270.h
new file mode 100644
index 000000000000..c4bc1108af6a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fs3270.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier:  GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_FS3270_H
+#define __ASM_S390_UAPI_FS3270_H
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+
+/* ioctls for fullscreen 3270 */
+#define TUBICMD		_IO('3', 3)	/* set ccw command for fs reads. */
+#define TUBOCMD		_IO('3', 4)	/* set ccw command for fs writes. */
+#define TUBGETI		_IO('3', 7)	/* get ccw command for fs reads. */
+#define TUBGETO		_IO('3', 8)	/* get ccw command for fs writes. */
+#define TUBGETMOD	_IO('3', 13)	/* get characteristics like model, cols, rows */
+
+/* For TUBGETMOD */
+struct raw3270_iocb {
+	__u16 model;
+	__u16 line_cnt;
+	__u16 col_cnt;
+	__u16 pf_cnt;
+	__u16 re_cnt;
+	__u16 map;
+};
+
+#endif /* __ASM_S390_UAPI_FS3270_H */
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index d1ecd5d722a0..2cd28af50dd4 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -27,6 +27,7 @@ enum ipl_pbt {
 	IPL_PBT_FCP = 0,
 	IPL_PBT_SCP_DATA = 1,
 	IPL_PBT_CCW = 2,
+	IPL_PBT_ECKD = 3,
 	IPL_PBT_NVME = 4,
 };
 
@@ -111,6 +112,34 @@ struct ipl_pb0_ccw {
 	__u8  reserved5[8];
 } __packed;
 
+/* IPL Parameter Block 0 for ECKD */
+struct ipl_pb0_eckd {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u32 reserved2[78];
+	__u8  opt;
+	__u8  reserved4[4];
+	__u8  reserved5:5;
+	__u8  ssid:3;
+	__u16 devno;
+	__u32 reserved6[5];
+	__u32 bootprog;
+	__u8  reserved7[12];
+	struct {
+		__u16 cyl;
+		__u8 head;
+		__u8 record;
+		__u32 reserved;
+	} br_chr __packed;
+	__u32 scp_data_len;
+	__u8  reserved8[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_ECKD_OPT_IPL	0x10
+#define IPL_PB0_ECKD_OPT_DUMP	0x20
+
 #define IPL_PB0_CCW_VM_FLAG_NSS		0x80
 #define IPL_PB0_CCW_VM_FLAG_VP		0x40
 
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 7a6b14874d65..60345dd2cba2 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -12,7 +12,320 @@
 #include <linux/types.h>
 
 #define __KVM_S390
-#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ *             pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ *        in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+	__u64 start_gfn;
+	__u32 count;
+	__u32 flags;
+	union {
+		__u64 remaining;
+		__u64 mask;
+	};
+	__u64 values;
+};
+
+#define KVM_S390_RESET_POR       1
+#define KVM_S390_RESET_CLEAR     2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT  8
+#define KVM_S390_RESET_IPL       16
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+	/* in */
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	union {
+		struct {
+			__u8 ar;	/* the access register number */
+			__u8 key;	/* access key, ignored if flag unset */
+			__u8 pad1[6];	/* ignored */
+			__u64 old_addr;	/* ignored if cmpxchg flag unset */
+		};
+		__u32 sida_offset; /* offset into the sida */
+		__u8 reserved[32]; /* ignored */
+	};
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ	0
+#define KVM_S390_MEMOP_LOGICAL_WRITE	1
+#define KVM_S390_MEMOP_SIDA_READ	2
+#define KVM_S390_MEMOP_SIDA_WRITE	3
+#define KVM_S390_MEMOP_ABSOLUTE_READ	4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE	5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG	6
+
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION	(1ULL << 2)
+
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE	(1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG	(1 << 1)
+
+struct kvm_s390_psw {
+	__u64 mask;
+	__u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP		0xfffe0000u
+#define KVM_S390_PROGRAM_INT		0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX	0xfffe0002u
+#define KVM_S390_RESTART		0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT	0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE	0xfffe0005u
+#define KVM_S390_MCHK			0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP		0xffff1004u
+#define KVM_S390_INT_CPU_TIMER		0xffff1005u
+#define KVM_S390_INT_VIRTIO		0xffff2603u
+#define KVM_S390_INT_SERVICE		0xffff2401u
+#define KVM_S390_INT_EMERGENCY		0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL	0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
+	(((schid)) |			       \
+	 ((ssid) << 16) |		       \
+	 ((cssid) << 18) |		       \
+	 ((ai) << 26))
+#define KVM_S390_INT_IO_MIN		0x00000000u
+#define KVM_S390_INT_IO_MAX		0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK		0x04000000u
+
+
+struct kvm_s390_interrupt {
+	__u32 type;
+	__u32 parm;
+	__u64 parm64;
+};
+
+struct kvm_s390_io_info {
+	__u16 subchannel_id;
+	__u16 subchannel_nr;
+	__u32 io_int_parm;
+	__u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+	__u32 ext_params;
+	__u32 pad;
+	__u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+	__u64 trans_exc_code;
+	__u64 mon_code;
+	__u64 per_address;
+	__u32 data_exc_code;
+	__u16 code;
+	__u16 mon_class_nr;
+	__u8 per_code;
+	__u8 per_atmid;
+	__u8 exc_access_id;
+	__u8 per_access_id;
+	__u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID	0x01
+#define KVM_S390_PGM_FLAGS_ILC_0	0x02
+#define KVM_S390_PGM_FLAGS_ILC_1	0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK	0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND	0x08
+	__u8 flags;
+	__u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+	__u32 address;
+};
+
+struct kvm_s390_extcall_info {
+	__u16 code;
+};
+
+struct kvm_s390_emerg_info {
+	__u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS	0x01
+struct kvm_s390_stop_info {
+	__u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+	__u64 cr14;
+	__u64 mcic;
+	__u64 failing_storage_address;
+	__u32 ext_damage_code;
+	__u32 pad;
+	__u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+	__u64 type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+		char reserved[64];
+	} u;
+};
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;        /* will stay unused for compatibility reasons */
+	__u32 len;
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
+};
+
+struct kvm_s390_ucas_mapping {
+	__u64 user_addr;
+	__u64 vcpu_addr;
+	__u64 length;
+};
+
+struct kvm_s390_pv_sec_parm {
+	__u64 origin;
+	__u64 length;
+};
+
+struct kvm_s390_pv_unp {
+	__u64 addr;
+	__u64 size;
+	__u64 tweak;
+};
+
+enum pv_cmd_dmp_id {
+	KVM_PV_DUMP_INIT,
+	KVM_PV_DUMP_CONFIG_STOR_STATE,
+	KVM_PV_DUMP_COMPLETE,
+	KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+	__u64 subcmd;
+	__u64 buff_addr;
+	__u64 buff_len;
+	__u64 gaddr;		/* For dump storage state */
+	__u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+	KVM_PV_INFO_VM,
+	KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+	__u64 dump_cpu_buffer_len;
+	__u64 dump_config_mem_buffer_per_1m;
+	__u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+	__u64 inst_calls_list[4];
+	__u64 max_cpus;
+	__u64 max_guests;
+	__u64 max_guest_addr;
+	__u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+	__u32 id;
+	__u32 len_max;
+	__u32 len_written;
+	__u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+	struct kvm_s390_pv_info_header header;
+	union {
+		struct kvm_s390_pv_info_dump dump;
+		struct kvm_s390_pv_info_vm vm;
+	};
+};
+
+enum pv_cmd_id {
+	KVM_PV_ENABLE,
+	KVM_PV_DISABLE,
+	KVM_PV_SET_SEC_PARMS,
+	KVM_PV_UNPACK,
+	KVM_PV_VERIFY,
+	KVM_PV_PREP_RESET,
+	KVM_PV_UNSHARE_ALL,
+	KVM_PV_INFO,
+	KVM_PV_DUMP,
+	KVM_PV_ASYNC_CLEANUP_PREPARE,
+	KVM_PV_ASYNC_CLEANUP_PERFORM,
+};
+
+struct kvm_pv_cmd {
+	__u32 cmd;	/* Command to be executed */
+	__u16 rc;	/* Ultravisor return code */
+	__u16 rrc;	/* Ultravisor return reason code */
+	__u64 data;	/* Data or address */
+	__u32 flags;    /* flags for future extensions. Must be 0 for now */
+	__u32 reserved[3];
+};
+
+struct kvm_s390_zpci_op {
+	/* in */
+	__u32 fh;               /* target device */
+	__u8  op;               /* operation to perform */
+	__u8  pad[3];
+	union {
+		/* for KVM_S390_ZPCIOP_REG_AEN */
+		struct {
+			__u64 ibv;      /* Guest addr of interrupt bit vector */
+			__u64 sb;       /* Guest addr of summary bit */
+			__u32 flags;
+			__u32 noi;      /* Number of interrupts */
+			__u8 isc;       /* Guest interrupt subclass */
+			__u8 sbo;       /* Offset of guest summary bit vector */
+			__u16 pad;
+		} reg_aen;
+		__u64 reserved[8];
+	} u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN                0
+#define KVM_S390_ZPCIOP_DEREG_AEN      1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
 
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
@@ -74,6 +387,7 @@ struct kvm_s390_io_adapter_req {
 #define KVM_S390_VM_CRYPTO		2
 #define KVM_S390_VM_CPU_MODEL		3
 #define KVM_S390_VM_MIGRATION		4
+#define KVM_S390_VM_CPU_TOPOLOGY	5
 
 /* kvm attributes for mem_ctrl */
 #define KVM_S390_VM_MEM_ENABLE_CMMA	0
@@ -155,7 +469,24 @@ struct kvm_s390_vm_cpu_subfunc {
 	__u8 kdsa[16];		/* with MSA9 */
 	__u8 sortl[32];		/* with STFLE.150 */
 	__u8 dfltcc[32];	/* with STFLE.151 */
-	__u8 reserved[1728];
+	__u8 pfcr[16];		/* with STFLE.201 */
+	__u8 reserved[1712];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST	6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST	7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS	64
+struct kvm_s390_vm_cpu_uv_feat {
+	union {
+		struct {
+			__u64 : 4;
+			__u64 ap : 1;		/* bit 4 */
+			__u64 ap_intr : 1;	/* bit 5 */
+			__u64 : 58;
+		};
+		__u64 feat;
+	};
 };
 
 /* kvm attributes for crypto */
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 924b876f992c..ca42e941675d 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -2,7 +2,7 @@
 /*
  * Userspace interface to the pkey device driver
  *
- * Copyright IBM Corp. 2017, 2019
+ * Copyright IBM Corp. 2017, 2023
  *
  * Author: Harald Freudenberger <freude@de.ibm.com>
  *
@@ -26,33 +26,44 @@
 #define MAXCLRKEYSIZE	32	   /* a clear key value may be up to 32 bytes */
 #define MAXAESCIPHERKEYSIZE 136  /* our aes cipher keys have always 136 bytes */
 #define MINEP11AESKEYBLOBSIZE 256  /* min EP11 AES key blob size  */
-#define MAXEP11AESKEYBLOBSIZE 320  /* max EP11 AES key blob size */
+#define MAXEP11AESKEYBLOBSIZE 336  /* max EP11 AES key blob size */
 
 /* Minimum size of a key blob */
 #define MINKEYBLOBSIZE	SECKEYBLOBSIZE
 
 /* defines for the type field within the pkey_protkey struct */
-#define PKEY_KEYTYPE_AES_128		      1
-#define PKEY_KEYTYPE_AES_192		      2
-#define PKEY_KEYTYPE_AES_256		      3
-#define PKEY_KEYTYPE_ECC		      4
+#define PKEY_KEYTYPE_AES_128		1
+#define PKEY_KEYTYPE_AES_192		2
+#define PKEY_KEYTYPE_AES_256		3
+#define PKEY_KEYTYPE_ECC		4
+#define PKEY_KEYTYPE_ECC_P256		5
+#define PKEY_KEYTYPE_ECC_P384		6
+#define PKEY_KEYTYPE_ECC_P521		7
+#define PKEY_KEYTYPE_ECC_ED25519	8
+#define PKEY_KEYTYPE_ECC_ED448		9
+#define PKEY_KEYTYPE_AES_XTS_128	10
+#define PKEY_KEYTYPE_AES_XTS_256	11
+#define PKEY_KEYTYPE_HMAC_512		12
+#define PKEY_KEYTYPE_HMAC_1024		13
 
 /* the newer ioctls use a pkey_key_type enum for type information */
 enum pkey_key_type {
-	PKEY_TYPE_CCA_DATA   = (__u32) 1,
-	PKEY_TYPE_CCA_CIPHER = (__u32) 2,
-	PKEY_TYPE_EP11	     = (__u32) 3,
-	PKEY_TYPE_CCA_ECC    = (__u32) 0x1f,
-	PKEY_TYPE_EP11_AES   = (__u32) 6,
-	PKEY_TYPE_EP11_ECC   = (__u32) 7,
+	PKEY_TYPE_CCA_DATA   = (__u32)1,
+	PKEY_TYPE_CCA_CIPHER = (__u32)2,
+	PKEY_TYPE_EP11	     = (__u32)3,
+	PKEY_TYPE_CCA_ECC    = (__u32)0x1f,
+	PKEY_TYPE_EP11_AES   = (__u32)6,
+	PKEY_TYPE_EP11_ECC   = (__u32)7,
+	PKEY_TYPE_PROTKEY    = (__u32)8,
+	PKEY_TYPE_UVSECRET   = (__u32)9,
 };
 
 /* the newer ioctls use a pkey_key_size enum for key size information */
 enum pkey_key_size {
-	PKEY_SIZE_AES_128 = (__u32) 128,
-	PKEY_SIZE_AES_192 = (__u32) 192,
-	PKEY_SIZE_AES_256 = (__u32) 256,
-	PKEY_SIZE_UNKNOWN = (__u32) 0xFFFFFFFF,
+	PKEY_SIZE_AES_128 = (__u32)128,
+	PKEY_SIZE_AES_192 = (__u32)192,
+	PKEY_SIZE_AES_256 = (__u32)256,
+	PKEY_SIZE_UNKNOWN = (__u32)0xFFFFFFFF,
 };
 
 /* some of the newer ioctls use these flags */
@@ -115,6 +126,7 @@ struct pkey_genseck {
 	__u32 keytype;		    /* in: key type to generate		 */
 	struct pkey_seckey seckey;  /* out: the secure key blob		 */
 };
+
 #define PKEY_GENSECK _IOWR(PKEY_IOCTL_MAGIC, 0x01, struct pkey_genseck)
 
 /*
@@ -127,6 +139,7 @@ struct pkey_clr2seck {
 	struct pkey_clrkey clrkey;  /* in: the clear key value		 */
 	struct pkey_seckey seckey;  /* out: the secure key blob		 */
 };
+
 #define PKEY_CLR2SECK _IOWR(PKEY_IOCTL_MAGIC, 0x02, struct pkey_clr2seck)
 
 /*
@@ -138,6 +151,7 @@ struct pkey_sec2protk {
 	struct pkey_seckey seckey;   /* in: the secure key blob		  */
 	struct pkey_protkey protkey; /* out: the protected key		  */
 };
+
 #define PKEY_SEC2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x03, struct pkey_sec2protk)
 
 /*
@@ -148,6 +162,7 @@ struct pkey_clr2protk {
 	struct pkey_clrkey clrkey;   /* in: the clear key value		  */
 	struct pkey_protkey protkey; /* out: the protected key		  */
 };
+
 #define PKEY_CLR2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x04, struct pkey_clr2protk)
 
 /*
@@ -159,6 +174,7 @@ struct pkey_findcard {
 	__u16  cardnr;			       /* out: card number	  */
 	__u16  domain;			       /* out: domain number	  */
 };
+
 #define PKEY_FINDCARD _IOWR(PKEY_IOCTL_MAGIC, 0x05, struct pkey_findcard)
 
 /*
@@ -168,6 +184,7 @@ struct pkey_skey2pkey {
 	struct pkey_seckey seckey;   /* in: the secure key blob		  */
 	struct pkey_protkey protkey; /* out: the protected key		  */
 };
+
 #define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
 
 /*
@@ -185,6 +202,7 @@ struct pkey_verifykey {
 	__u16  keysize;			       /* out: key size in bits   */
 	__u32  attributes;		       /* out: attribute bits	  */
 };
+
 #define PKEY_VERIFYKEY _IOWR(PKEY_IOCTL_MAGIC, 0x07, struct pkey_verifykey)
 #define PKEY_VERIFY_ATTR_AES	   0x00000001  /* key is an AES key */
 #define PKEY_VERIFY_ATTR_OLD_MKVP  0x00000100  /* key has old MKVP value */
@@ -216,6 +234,7 @@ struct pkey_kblob2pkey {
 	__u32 keylen;			/* in: the key blob length */
 	struct pkey_protkey protkey;	/* out: the protected key  */
 };
+
 #define PKEY_KBLOB2PROTK _IOWR(PKEY_IOCTL_MAGIC, 0x0A, struct pkey_kblob2pkey)
 
 /*
@@ -248,6 +267,7 @@ struct pkey_genseck2 {
 	__u32 keylen;		    /* in: available key blob buffer size */
 				    /* out: actual key blob size	  */
 };
+
 #define PKEY_GENSECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x11, struct pkey_genseck2)
 
 /*
@@ -282,6 +302,7 @@ struct pkey_clr2seck2 {
 	__u32 keylen;		    /* in: available key blob buffer size  */
 				    /* out: actual key blob size	   */
 };
+
 #define PKEY_CLR2SECK2 _IOWR(PKEY_IOCTL_MAGIC, 0x12, struct pkey_clr2seck2)
 
 /*
@@ -319,6 +340,7 @@ struct pkey_verifykey2 {
 	enum pkey_key_size size;    /* out: the key size		 */
 	__u32 flags;		    /* out: additional key info flags	 */
 };
+
 #define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
 
 /*
@@ -341,6 +363,7 @@ struct pkey_kblob2pkey2 {
 	__u32 apqn_entries;	     /* in: # of apqn target list entries  */
 	struct pkey_protkey protkey; /* out: the protected key		   */
 };
+
 #define PKEY_KBLOB2PROTK2 _IOWR(PKEY_IOCTL_MAGIC, 0x1A, struct pkey_kblob2pkey2)
 
 /*
@@ -348,7 +371,7 @@ struct pkey_kblob2pkey2 {
  * Is able to find out which type of secure key is given (CCA AES secure
  * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private
  * key) and tries to find all matching crypto cards based on the MKVP and maybe
- * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
+ * other criteria (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
  * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of
  * APQNs is further filtered by the key's mkvp which needs to match to either
  * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters
@@ -365,7 +388,7 @@ struct pkey_kblob2pkey2 {
  * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
  * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0
  * but the number of apqn targets does not fit into the list, the apqn_targets
- * field is updatedd with the number of reqired entries but there are no apqn
+ * field is updated with the number of required entries but there are no apqn
  * values stored in the list and the ioctl returns with ENOSPC. If no matching
  * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0.
  */
@@ -377,6 +400,7 @@ struct pkey_apqns4key {
 	__u32 apqn_entries;	   /* in: max # of apqn entries in the list   */
 				   /* out: # apqns stored into the list	      */
 };
+
 #define PKEY_APQNS4K _IOWR(PKEY_IOCTL_MAGIC, 0x1B, struct pkey_apqns4key)
 
 /*
@@ -403,7 +427,7 @@ struct pkey_apqns4key {
  * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
  * of apqn targets found and the ioctl returns with 0. If apqn_entries is > 0
  * but the number of apqn targets does not fit into the list, the apqn_targets
- * field is updatedd with the number of reqired entries but there are no apqn
+ * field is updated with the number of required entries but there are no apqn
  * values stored in the list and the ioctl returns with ENOSPC. If no matching
  * APQN is found, the ioctl returns with 0 but the apqn_entries value is 0.
  */
@@ -416,6 +440,7 @@ struct pkey_apqns4keytype {
 	__u32 apqn_entries;	   /* in: max # of apqn entries in the list   */
 				   /* out: # apqns stored into the list	      */
 };
+
 #define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
 
 /*
@@ -442,6 +467,7 @@ struct pkey_kblob2pkey3 {
 	__u32 pkeylen;	 /* in/out: size of pkey buffer/actual len of pkey */
 	__u8 __user *pkey;		 /* in: pkey blob buffer space ptr */
 };
+
 #define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
 
 #endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index ad64d673b5e6..bb0826024bb9 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -8,6 +8,8 @@
 #ifndef _UAPI_S390_PTRACE_H
 #define _UAPI_S390_PTRACE_H
 
+#include <linux/const.h>
+
 /*
  * Offsets in the user_regs_struct. They are used for the ptrace
  * system call and in entry.S
@@ -166,6 +168,64 @@
 
 #endif /* __s390x__ */
 
+#ifndef __s390x__
+
+#define PSW_MASK_PER		_AC(0x40000000, UL)
+#define PSW_MASK_DAT		_AC(0x04000000, UL)
+#define PSW_MASK_IO		_AC(0x02000000, UL)
+#define PSW_MASK_EXT		_AC(0x01000000, UL)
+#define PSW_MASK_KEY		_AC(0x00F00000, UL)
+#define PSW_MASK_BASE		_AC(0x00080000, UL)	/* always one */
+#define PSW_MASK_MCHECK		_AC(0x00040000, UL)
+#define PSW_MASK_WAIT		_AC(0x00020000, UL)
+#define PSW_MASK_PSTATE		_AC(0x00010000, UL)
+#define PSW_MASK_ASC		_AC(0x0000C000, UL)
+#define PSW_MASK_CC		_AC(0x00003000, UL)
+#define PSW_MASK_PM		_AC(0x00000F00, UL)
+#define PSW_MASK_RI		_AC(0x00000000, UL)
+#define PSW_MASK_EA		_AC(0x00000000, UL)
+#define PSW_MASK_BA		_AC(0x00000000, UL)
+
+#define PSW_MASK_USER		_AC(0x0000FF00, UL)
+
+#define PSW_ADDR_AMODE		_AC(0x80000000, UL)
+#define PSW_ADDR_INSN		_AC(0x7FFFFFFF, UL)
+
+#define PSW_ASC_PRIMARY		_AC(0x00000000, UL)
+#define PSW_ASC_ACCREG		_AC(0x00004000, UL)
+#define PSW_ASC_SECONDARY	_AC(0x00008000, UL)
+#define PSW_ASC_HOME		_AC(0x0000C000, UL)
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER		_AC(0x4000000000000000, UL)
+#define PSW_MASK_DAT		_AC(0x0400000000000000, UL)
+#define PSW_MASK_IO		_AC(0x0200000000000000, UL)
+#define PSW_MASK_EXT		_AC(0x0100000000000000, UL)
+#define PSW_MASK_BASE		_AC(0x0000000000000000, UL)
+#define PSW_MASK_KEY		_AC(0x00F0000000000000, UL)
+#define PSW_MASK_MCHECK		_AC(0x0004000000000000, UL)
+#define PSW_MASK_WAIT		_AC(0x0002000000000000, UL)
+#define PSW_MASK_PSTATE		_AC(0x0001000000000000, UL)
+#define PSW_MASK_ASC		_AC(0x0000C00000000000, UL)
+#define PSW_MASK_CC		_AC(0x0000300000000000, UL)
+#define PSW_MASK_PM		_AC(0x00000F0000000000, UL)
+#define PSW_MASK_RI		_AC(0x0000008000000000, UL)
+#define PSW_MASK_EA		_AC(0x0000000100000000, UL)
+#define PSW_MASK_BA		_AC(0x0000000080000000, UL)
+
+#define PSW_MASK_USER		_AC(0x0000FF0180000000, UL)
+
+#define PSW_ADDR_AMODE		_AC(0x0000000000000000, UL)
+#define PSW_ADDR_INSN		_AC(0xFFFFFFFFFFFFFFFF, UL)
+
+#define PSW_ASC_PRIMARY		_AC(0x0000000000000000, UL)
+#define PSW_ASC_ACCREG		_AC(0x0000400000000000, UL)
+#define PSW_ASC_SECONDARY	_AC(0x0000800000000000, UL)
+#define PSW_ASC_HOME		_AC(0x0000C00000000000, UL)
+
+#endif /* __s390x__ */
+
 #define NUM_GPRS	16
 #define NUM_FPRS	16
 #define NUM_CRS		16
@@ -214,69 +274,6 @@ typedef struct {
 	unsigned long addr;
 } __attribute__ ((aligned(8))) psw_t;
 
-#ifndef __s390x__
-
-#define PSW_MASK_PER		0x40000000UL
-#define PSW_MASK_DAT		0x04000000UL
-#define PSW_MASK_IO		0x02000000UL
-#define PSW_MASK_EXT		0x01000000UL
-#define PSW_MASK_KEY		0x00F00000UL
-#define PSW_MASK_BASE		0x00080000UL	/* always one */
-#define PSW_MASK_MCHECK		0x00040000UL
-#define PSW_MASK_WAIT		0x00020000UL
-#define PSW_MASK_PSTATE		0x00010000UL
-#define PSW_MASK_ASC		0x0000C000UL
-#define PSW_MASK_CC		0x00003000UL
-#define PSW_MASK_PM		0x00000F00UL
-#define PSW_MASK_RI		0x00000000UL
-#define PSW_MASK_EA		0x00000000UL
-#define PSW_MASK_BA		0x00000000UL
-
-#define PSW_MASK_USER		0x0000FF00UL
-
-#define PSW_ADDR_AMODE		0x80000000UL
-#define PSW_ADDR_INSN		0x7FFFFFFFUL
-
-#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW_ASC_PRIMARY		0x00000000UL
-#define PSW_ASC_ACCREG		0x00004000UL
-#define PSW_ASC_SECONDARY	0x00008000UL
-#define PSW_ASC_HOME		0x0000C000UL
-
-#else /* __s390x__ */
-
-#define PSW_MASK_PER		0x4000000000000000UL
-#define PSW_MASK_DAT		0x0400000000000000UL
-#define PSW_MASK_IO		0x0200000000000000UL
-#define PSW_MASK_EXT		0x0100000000000000UL
-#define PSW_MASK_BASE		0x0000000000000000UL
-#define PSW_MASK_KEY		0x00F0000000000000UL
-#define PSW_MASK_MCHECK		0x0004000000000000UL
-#define PSW_MASK_WAIT		0x0002000000000000UL
-#define PSW_MASK_PSTATE		0x0001000000000000UL
-#define PSW_MASK_ASC		0x0000C00000000000UL
-#define PSW_MASK_CC		0x0000300000000000UL
-#define PSW_MASK_PM		0x00000F0000000000UL
-#define PSW_MASK_RI		0x0000008000000000UL
-#define PSW_MASK_EA		0x0000000100000000UL
-#define PSW_MASK_BA		0x0000000080000000UL
-
-#define PSW_MASK_USER		0x0000FF0180000000UL
-
-#define PSW_ADDR_AMODE		0x0000000000000000UL
-#define PSW_ADDR_INSN		0xFFFFFFFFFFFFFFFFUL
-
-#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 52)
-
-#define PSW_ASC_PRIMARY		0x0000000000000000UL
-#define PSW_ASC_ACCREG		0x0000400000000000UL
-#define PSW_ASC_SECONDARY	0x0000800000000000UL
-#define PSW_ASC_HOME		0x0000C00000000000UL
-
-#endif /* __s390x__ */
-
-
 /*
  * The s390_regs structure is used to define the elf_gregset_t.
  */
diff --git a/arch/s390/include/uapi/asm/raw3270.h b/arch/s390/include/uapi/asm/raw3270.h
new file mode 100644
index 000000000000..6676f102bd50
--- /dev/null
+++ b/arch/s390/include/uapi/asm/raw3270.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_UAPI_RAW3270_H
+#define __ASM_S390_UAPI_RAW3270_H
+
+/* Local Channel Commands */
+#define TC_WRITE	0x01		/* Write */
+#define TC_RDBUF	0x02		/* Read Buffer */
+#define TC_EWRITE	0x05		/* Erase write */
+#define TC_READMOD	0x06		/* Read modified */
+#define TC_EWRITEA	0x0d		/* Erase write alternate */
+#define TC_WRITESF	0x11		/* Write structured field */
+
+/* Buffer Control Orders */
+#define TO_GE		0x08		/* Graphics Escape */
+#define TO_SF		0x1d		/* Start field */
+#define TO_SBA		0x11		/* Set buffer address */
+#define TO_IC		0x13		/* Insert cursor */
+#define TO_PT		0x05		/* Program tab */
+#define TO_RA		0x3c		/* Repeat to address */
+#define TO_SFE		0x29		/* Start field extended */
+#define TO_EUA		0x12		/* Erase unprotected to address */
+#define TO_MF		0x2c		/* Modify field */
+#define TO_SA		0x28		/* Set attribute */
+
+/* Field Attribute Bytes */
+#define TF_INPUT	0x40		/* Visible input */
+#define TF_INPUTN	0x4c		/* Invisible input */
+#define TF_INMDT	0xc1		/* Visible, Set-MDT */
+#define TF_LOG		0x60
+
+/* Character Attribute Bytes */
+#define TAT_RESET	0x00
+#define TAT_FIELD	0xc0
+#define TAT_EXTHI	0x41
+#define TAT_FGCOLOR	0x42
+#define TAT_CHARS	0x43
+#define TAT_BGCOLOR	0x45
+#define TAT_TRANS	0x46
+
+/* Extended-Highlighting Bytes */
+#define TAX_RESET	0x00
+#define TAX_BLINK	0xf1
+#define TAX_REVER	0xf2
+#define TAX_UNDER	0xf4
+
+/* Reset value */
+#define TAR_RESET	0x00
+
+/* Color values */
+#define TAC_RESET	0x00
+#define TAC_BLUE	0xf1
+#define TAC_RED		0xf2
+#define TAC_PINK	0xf3
+#define TAC_GREEN	0xf4
+#define TAC_TURQ	0xf5
+#define TAC_YELLOW	0xf6
+#define TAC_WHITE	0xf7
+#define TAC_DEFAULT	0x00
+
+/* Write Control Characters */
+#define TW_NONE		0x40		/* No particular action */
+#define TW_KR		0xc2		/* Keyboard restore */
+#define TW_PLUSALARM	0x04		/* Add this bit for alarm */
+
+#define RAW3270_FIRSTMINOR	1	/* First minor number */
+#define RAW3270_MAXDEVS		255	/* Max number of 3270 devices */
+
+#define AID_CLEAR		0x6d
+#define AID_ENTER		0x7d
+#define AID_PF3			0xf3
+#define AID_PF7			0xf7
+#define AID_PF8			0xf8
+#define AID_READ_PARTITION	0x88
+
+#endif /* __ASM_S390_UAPI_RAW3270_H */
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
index 72604f7792c3..f85b50723dd3 100644
--- a/arch/s390/include/uapi/asm/statfs.h
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -30,7 +30,7 @@ struct statfs {
 	unsigned int	f_namelen;
 	unsigned int	f_frsize;
 	unsigned int	f_flags;
-	unsigned int	f_spare[4];
+	unsigned int	f_spare[5];
 };
 
 struct statfs64 {
@@ -45,7 +45,7 @@ struct statfs64 {
 	unsigned int	f_namelen;
 	unsigned int	f_frsize;
 	unsigned int	f_flags;
-	unsigned int	f_spare[4];
+	unsigned int	f_spare[5];
 };
 
 #endif
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
deleted file mode 100644
index 54223169c806..000000000000
--- a/arch/s390/include/uapi/asm/termios.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *  S390 version
- *
- *  Derived from "include/asm-i386/termios.h"
- */
-
-#ifndef _UAPI_S390_TERMIOS_H
-#define _UAPI_S390_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
-	unsigned short ws_row;
-	unsigned short ws_col;
-	unsigned short ws_xpixel;
-	unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
-	unsigned short c_iflag;		/* input mode flags */
-	unsigned short c_oflag;		/* output mode flags */
-	unsigned short c_cflag;		/* control mode flags */
-	unsigned short c_lflag;		/* local mode flags */
-	unsigned char c_line;		/* line discipline */
-	unsigned char c_cc[NCC];	/* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE	0x001
-#define TIOCM_DTR	0x002
-#define TIOCM_RTS	0x004
-#define TIOCM_ST	0x008
-#define TIOCM_SR	0x010
-#define TIOCM_CTS	0x020
-#define TIOCM_CAR	0x040
-#define TIOCM_RNG	0x080
-#define TIOCM_DSR	0x100
-#define TIOCM_CD	TIOCM_CAR
-#define TIOCM_RI	TIOCM_RNG
-#define TIOCM_OUT1	0x2000
-#define TIOCM_OUT2	0x4000
-#define TIOCM_LOOP	0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-
-#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index da034c606314..84457dbb26b4 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -12,15 +12,18 @@
 
 #ifndef __ASSEMBLY__
 
-/* A address type so that arithmetic can be done on it & it can be upgraded to
-   64 bit when necessary 
-*/
-typedef unsigned long addr_t; 
+typedef unsigned long addr_t;
 typedef __signed__ long saddr_t;
 
 typedef struct {
-	__u32 u[4];
-} __vector128;
+	union {
+		struct {
+			__u64 high;
+			__u64 low;
+		};
+		__u32 u[4];
+	};
+} __attribute__((packed, aligned(4))) __vector128;
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
index 10a5ac918e02..4947f26ad9fb 100644
--- a/arch/s390/include/uapi/asm/uvdevice.h
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
- *  Copyright IBM Corp. 2022
+ *  Copyright IBM Corp. 2022, 2024
  *  Author(s): Steffen Eiden <seiden@linux.ibm.com>
  */
 #ifndef __S390_ASM_UVDEVICE_H
@@ -32,6 +32,33 @@ struct uvio_attest {
 	__u16 reserved136;				/* 0x0136 */
 };
 
+/**
+ * uvio_uvdev_info - Information of supported functions
+ * @supp_uvio_cmds - supported IOCTLs by this device
+ * @supp_uv_cmds - supported UVCs corresponding to the IOCTL
+ *
+ * UVIO request to get information about supported request types by this
+ * uvdevice and the Ultravisor.  Everything is output. Bits are in LSB0
+ * ordering.  If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the
+ * uvdevice and the Ultravisor support that call.
+ *
+ * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds`
+ * as there is no corresponding UV-call.
+ */
+struct uvio_uvdev_info {
+	/*
+	 * If bit `n` is set, this device supports the IOCTL with nr `n`.
+	 */
+	__u64 supp_uvio_cmds;
+	/*
+	 * If bit `n` is set, the Ultravisor(UV) supports the UV-call
+	 * corresponding to the IOCTL with nr `n` in the calling context (host
+	 * or guest).  The value is only valid if the corresponding bit in
+	 * @supp_uvio_cmds is set as well.
+	 */
+	__u64 supp_uv_cmds;
+};
+
 /*
  * The following max values define an upper length for the IOCTL in/out buffers.
  * However, they do not represent the maximum the Ultravisor allows which is
@@ -42,10 +69,38 @@ struct uvio_attest {
 #define UVIO_ATT_ARCB_MAX_LEN		0x100000
 #define UVIO_ATT_MEASUREMENT_MAX_LEN	0x8000
 #define UVIO_ATT_ADDITIONAL_MAX_LEN	0x8000
+#define UVIO_ADD_SECRET_MAX_LEN		0x100000
+#define UVIO_LIST_SECRETS_LEN		0x1000
+#define UVIO_RETR_SECRET_MAX_LEN	0x2000
 
 #define UVIO_DEVICE_NAME "uv"
 #define UVIO_TYPE_UVC 'u'
 
-#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb)
+enum UVIO_IOCTL_NR {
+	UVIO_IOCTL_UVDEV_INFO_NR = 0x00,
+	UVIO_IOCTL_ATT_NR,
+	UVIO_IOCTL_ADD_SECRET_NR,
+	UVIO_IOCTL_LIST_SECRETS_NR,
+	UVIO_IOCTL_LOCK_SECRETS_NR,
+	UVIO_IOCTL_RETR_SECRET_NR,
+	/* must be the last entry */
+	UVIO_IOCTL_NUM_IOCTLS
+};
+
+#define UVIO_IOCTL(nr)			_IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb)
+#define UVIO_IOCTL_UVDEV_INFO		UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR)
+#define UVIO_IOCTL_ATT			UVIO_IOCTL(UVIO_IOCTL_ATT_NR)
+#define UVIO_IOCTL_ADD_SECRET		UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_IOCTL_LIST_SECRETS		UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_IOCTL_LOCK_SECRETS		UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_IOCTL_RETR_SECRET		UVIO_IOCTL(UVIO_IOCTL_RETR_SECRET_NR)
+
+#define UVIO_SUPP_CALL(nr)		(1ULL << (nr))
+#define UVIO_SUPP_UDEV_INFO		UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR)
+#define UVIO_SUPP_ATT			UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR)
+#define UVIO_SUPP_ADD_SECRET		UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR)
+#define UVIO_SUPP_LIST_SECRETS		UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR)
+#define UVIO_SUPP_LOCK_SECRETS		UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR)
+#define UVIO_SUPP_RETR_SECRET		UVIO_SUPP_CALL(UVIO_IOCTL_RETR_SECRET_NR)
 
 #endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index d83713f67530..f4785abe1b9f 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -85,7 +85,8 @@ struct ica_rsa_modexpo_crt {
 struct CPRBX {
 	__u16	     cprb_len;		/* CPRB length	      220	 */
 	__u8	     cprb_ver_id;	/* CPRB version id.   0x02	 */
-	__u8	     _pad_000[3];	/* Alignment pad bytes		 */
+	__u8	     ctfm;		/* Command Type Filtering Mask	 */
+	__u8	     pad_000[2];	/* Alignment pad bytes		 */
 	__u8	     func_id[2];	/* function id	      0x5432	 */
 	__u8	     cprb_flags[4];	/* Flags			 */
 	__u32	     req_parml;		/* request parameter buffer len	 */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 27d6b3c7aa06..ea5ed6654050 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -10,6 +10,9 @@ CFLAGS_REMOVE_ftrace.o		= $(CC_FLAGS_FTRACE)
 
 # Do not trace early setup code
 CFLAGS_REMOVE_early.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o	= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o	= $(CC_FLAGS_FTRACE)
 
 endif
 
@@ -33,22 +36,24 @@ CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
 CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
-obj-y	:= traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y	:= head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o
-obj-y	+= sysinfo.o lgr.o os_info.o machine_kexec.o
+obj-y	+= debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
+obj-y	+= sysinfo.o lgr.o os_info.o ctlreg.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y	+= smp.o text_amode31.o stacktrace.o
+obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y	+= diag/
 
-extra-y				+= head64.o vmlinux.lds
+always-$(KBUILD_BUILTIN)	+= vmlinux.lds
 
 obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
 
+obj-$(CONFIG_SYSFS)		+= cpacf.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o hiperdispatch.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
@@ -56,26 +61,27 @@ obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
 obj-$(CONFIG_COMPAT)		+= $(compat-obj-y)
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_KPROBES)		+= kprobes_insn_page.o
 obj-$(CONFIG_KPROBES)		+= mcount.o
+obj-$(CONFIG_RETHOOK)		+= rethook.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO)	+= vmcore_info.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
 obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
-
+obj-$(CONFIG_CERT_STORE)	+= cert_store.o
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
 
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf_common.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o perf_pai_ext.o
 
 obj-$(CONFIG_TRACEPOINTS)	+= trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
 
 # vdso
 obj-y				+= vdso64/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..6252b7d115dd
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	unsigned long phys = __pa(lc);
+	int rc, i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+		if (rc) {
+			/*
+			 * Do not unmap allocated page tables in case the
+			 * allocation was not requested. In such a case the
+			 * request is expected coming from an atomic context,
+			 * while the unmap attempt might sleep.
+			 */
+			if (alloc) {
+				for (--i; i >= 0; i--) {
+					addr -= PAGE_SIZE;
+					vmem_unmap_4k_page(addr);
+				}
+			}
+			return rc;
+		}
+		addr += PAGE_SIZE;
+		phys += PAGE_SIZE;
+	}
+	return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	int i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		vmem_unmap_4k_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index e7bca29f9c34..90c0e6408992 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,75 +1,90 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#ifndef pr_fmt
+#define pr_fmt(fmt)	"alt: " fmt
+#endif
+
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
 #include <asm/alternative.h>
 #include <asm/facility.h>
-#include <asm/nospec-branch.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+
+#ifndef a_debug
+#define a_debug		pr_debug
+#endif
+
+#ifndef __kernel_va
+#define __kernel_va(x)	(void *)(x)
+#endif
+
+unsigned long __bootdata_preserved(machine_features[1]);
+
+struct alt_debug {
+	unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG];
+	unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG];
+	int spec;
+};
 
-static int __initdata_or_module alt_instr_disabled;
+static struct alt_debug __bootdata_preserved(alt_debug);
 
-static int __init disable_alternative_instructions(char *str)
+static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data)
 {
-	alt_instr_disabled = 1;
-	return 0;
-}
+	char oinsn[33], ninsn[33];
+	unsigned long kptr;
+	unsigned int pos;
 
-early_param("noaltinstr", disable_alternative_instructions);
+	for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++)
+		hex_byte_pack(&oinsn[2 * pos], old[pos]);
+	oinsn[2 * pos] = 0;
+	for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++)
+		hex_byte_pack(&ninsn[2 * pos], new[pos]);
+	ninsn[2 * pos] = 0;
+	kptr = (unsigned long)__kernel_va(old);
+	a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn);
+}
 
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
-						  struct alt_instr *end)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
 {
+	struct alt_debug *d;
 	struct alt_instr *a;
-	u8 *instr, *replacement;
+	bool debug, replace;
+	u8 *old, *new;
 
 	/*
 	 * The scan order should be from start to end. A later scanned
 	 * alternative code can overwrite previously scanned alternative code.
 	 */
+	d = &alt_debug;
 	for (a = start; a < end; a++) {
-		instr = (u8 *)&a->instr_offset + a->instr_offset;
-		replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
-		if (!__test_facility(a->facility, alt_stfle_fac_list))
-			continue;
-
-		if (unlikely(a->instrlen % 2)) {
-			WARN_ONCE(1, "cpu alternatives instructions length is "
-				     "odd, skipping patching\n");
+		if (!(a->ctx & ctx))
 			continue;
+		switch (a->type) {
+		case ALT_TYPE_FACILITY:
+			replace = test_facility(a->data);
+			debug = __test_facility(a->data, d->facilities);
+			break;
+		case ALT_TYPE_FEATURE:
+			replace = test_machine_feature(a->data);
+			debug = __test_machine_feature(a->data, d->mfeatures);
+			break;
+		case ALT_TYPE_SPEC:
+			replace = nobp_enabled();
+			debug = d->spec;
+			break;
+		default:
+			replace = false;
+			debug = false;
 		}
-
-		s390_kernel_write(instr, replacement, a->instrlen);
+		if (!replace)
+			continue;
+		old = (u8 *)&a->instr_offset + a->instr_offset;
+		new = (u8 *)&a->repl_offset + a->repl_offset;
+		if (debug)
+			alternative_dump(old, new, a->instrlen, a->type, a->data);
+		s390_kernel_write(old, new, a->instrlen);
 	}
 }
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
-					 struct alt_instr *end)
-{
-	if (!alt_instr_disabled)
-		__apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
-	apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
-	sync_core();
-}
-
-void text_poke_sync(void)
-{
-	on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
-	cpus_read_lock();
-	text_poke_sync();
-	cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index d8ce965c0a97..95ecad9c7d7d 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -5,16 +5,14 @@
  * and format the required data.
  */
 
-#define ASM_OFFSETS_C
-
 #include <linux/kbuild.h>
-#include <linux/kvm_host.h>
 #include <linux/sched.h>
 #include <linux/purgatory.h>
 #include <linux/pgtable.h>
-#include <asm/idle.h>
-#include <asm/gmap.h>
+#include <linux/ftrace_regs.h>
+#include <asm/kvm_host_types.h>
 #include <asm/stacktrace.h>
+#include <asm/ptrace.h>
 
 int main(void)
 {
@@ -28,6 +26,7 @@ int main(void)
 	BLANK();
 	/* thread info offsets */
 	OFFSET(__TI_flags, task_struct, thread_info.flags);
+	OFFSET(__TI_sie, task_struct, thread_info.sie);
 	BLANK();
 	/* pt_regs offsets */
 	OFFSET(__PT_PSW, pt_regs, psw);
@@ -49,8 +48,8 @@ int main(void)
 	OFFSET(__PT_R14, pt_regs, gprs[14]);
 	OFFSET(__PT_R15, pt_regs, gprs[15]);
 	OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+	OFFSET(__PT_INT_CODE, pt_regs, int_code);
 	OFFSET(__PT_FLAGS, pt_regs, flags);
-	OFFSET(__PT_CR1, pt_regs, cr1);
 	OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
@@ -62,19 +61,22 @@ int main(void)
 	OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
 	OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
 	OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+	OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+	OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
 	DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
 	BLANK();
-	/* idle data offsets */
-	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
-	OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
-	OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
+	OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+	DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user));
+	OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address);
+	DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper));
 	BLANK();
 	/* hardware defined lowcore locations 0x000 - 0x1ff */
 	OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
 	OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
 	OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
 	OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
-	OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+	OFFSET(__LC_PGM_CODE, lowcore, pgm_code);
+	OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code);
 	OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
 	OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
 	OFFSET(__LC_PER_CODE, lowcore, per_code);
@@ -109,10 +111,9 @@ int main(void)
 	OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
 	OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
 	/* software defined lowcore locations 0x200 - 0xdff*/
-	OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
-	OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA, lowcore, save_area);
 	OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
-	OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+	OFFSET(__LC_PCPU, lowcore, pcpu);
 	OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
 	OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
 	OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -121,8 +122,6 @@ int main(void)
 	OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
 	OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
 	OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
-	OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
-	OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
 	OFFSET(__LC_CURRENT, lowcore, current_task);
 	OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
 	OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
@@ -137,7 +136,6 @@ int main(void)
 	OFFSET(__LC_USER_ASCE, lowcore, user_asce);
 	OFFSET(__LC_LPP, lowcore, lpp);
 	OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
-	OFFSET(__LC_GMAP, lowcore, gmap);
 	OFFSET(__LC_LAST_BREAK, lowcore, last_break);
 	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
 	OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
@@ -160,7 +158,6 @@ int main(void)
 	OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
 	BLANK();
 	/* gmap/sie offsets */
-	OFFSET(__GMAP_ASCE, gmap, asce);
 	OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
 	OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
 	/* kexec_sha_region */
@@ -177,5 +174,9 @@ int main(void)
 	DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
 	DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
 	DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+	OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
+	DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
+
+	OFFSET(__PCPU_FLAGS, pcpu, flags);
 	return 0;
 }
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 7ee3651d00ab..4f2669030220 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -46,7 +46,7 @@ struct cache_info {
 #define CACHE_MAX_LEVEL 8
 union cache_topology {
 	struct cache_info ci[CACHE_MAX_LEVEL];
-	unsigned long long raw;
+	unsigned long raw;
 };
 
 static const char * const cache_type_string[] = {
@@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu)
 			ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
 		}
 	}
+	this_cpu_ci->cpu_map_populated = true;
 	return 0;
 }
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 000000000000..c217a5e64094
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s):	Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES		10
+
+#define VCE_FLAGS_VALID_MASK		0x80
+
+#define ISM_LEN_DWORDS			4
+#define VCSSB_LEN_BYTES			128
+#define VCSSB_LEN_NO_CERTS		4
+#define VCB_LEN_NO_CERTS		64
+#define VC_NAME_LEN_BYTES		64
+
+#define CERT_STORE_KEY_TYPE_NAME	"cert_store_key"
+#define CERT_STORE_KEYRING_NAME		"cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+	debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+	DIAG320_SUBCODES	= 0,
+	DIAG320_STORAGE		= 1,
+	DIAG320_CERT_BLOCK	= 2,
+};
+
+enum diag320_rc {
+	DIAG320_RC_OK		= 0x0001,
+	DIAG320_RC_CS_NOMATCH	= 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+	u32 vcssb_length;
+	u8  pad_0x04[3];
+	u8  version;
+	u8  pad_0x08[8];
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u16 total_vc_index_count;
+	u16 max_vc_index_count;
+	u8  pad_0x24[28];
+	u32 max_vce_length;
+	u32 max_vcxe_length;
+	u8  pad_0x48[8];
+	u32 max_single_vcb_length;
+	u32 total_vcb_length;
+	u32 max_single_vcxb_length;
+	u32 total_vcxb_length;
+	u8  pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+	u32 vce_length;
+	u8  flags;
+	u8  key_type;
+	u16 vc_index;
+	u8  vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+	u8  vc_format;
+	u8  pad_0x49;
+	u16 key_id_length;
+	u8  pad_0x4c;
+	u8  vc_hash_type;
+	u16 vc_hash_length;
+	u8  pad_0x50[4];
+	u32 vc_length;
+	u8  pad_0x58[8];
+	u16 vc_hash_offset;
+	u16 vc_offset;
+	u8  pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+	u32 vcb_input_length;
+	u8  pad_0x04[4];
+	u16 first_vc_index;
+	u16 last_vc_index;
+	u32 pad_0x0c;
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u32 vcb_output_length;
+	u8  pad_0x24[3];
+	u8  version;
+	u16 stored_vc_count;
+	u16 remaining_vc_count;
+	u8  pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+	struct vcb_header vcb_hdr;
+	u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+	struct vce_header vce_hdr;
+	u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+	char ascii[VC_NAME_LEN_BYTES + 1];
+
+	/*
+	 * First 64 bytes of the key description is key name in EBCDIC CP 500.
+	 * Convert it to ASCII for displaying in /proc/keys.
+	 */
+	strscpy(ascii, key->description);
+	EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+	seq_puts(m, ascii);
+
+	seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+	if (key_is_positive(key))
+		seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+	.name		= CERT_STORE_KEY_TYPE_NAME,
+	.preparse	= user_preparse,
+	.free_preparse	= user_free_preparse,
+	.instantiate	= generic_key_instantiate,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= cert_store_key_describe,
+	.read		= user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+	pr_dbf_msg("VCB Header:");
+	pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+	pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+	pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+	pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+	pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+	pr_dbf_msg("version: %d", b->vcb_hdr.version);
+	pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+	pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+	unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+	char log_string[VC_NAME_LEN_BYTES + 40];
+
+	pr_dbf_msg("VCE Header:");
+	pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+	pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+	pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+	pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+	pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+	pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+	pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+	pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+	pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+	pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+	pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+	/* Certificate name in ASCII. */
+	memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+	EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+	vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+	snprintf(log_string, sizeof(log_string),
+		 "index: %d vce_hdr.vc_name (ASCII): %s",
+		 e->vce_hdr.vc_index, vc_name);
+	debug_text_event(cert_store_hexdump, 3, log_string);
+
+	/* Certificate data. */
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+	debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+	debug_event(cert_store_hexdump, 3,
+		    (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+	debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+	pr_dbf_msg("VCSSB:");
+	pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+	pr_dbf_msg("version: %u", s->version);
+	pr_dbf_msg("cs_token: %u", s->cs_token);
+	pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+	pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+	pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+	pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+	pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+	pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, };
+
+	asm_inline volatile(
+		"	diag	%[rp],%[subcode],0x320\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rp] "+d" (rp.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+
+	return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X320);
+
+	return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+	u8 hash[SHA256_DIGEST_SIZE];
+	u16 vc_hash_length;
+	u8 *vce_hash;
+
+	vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+	vc_hash_length = vce->vce_hdr.vc_hash_length;
+	sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+	if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+		return 0;
+
+	pr_dbf_msg("SHA256 hash of received certificate does not match");
+	debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+	debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+	debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+	debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+	return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+	if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+		pr_dbf_msg("Certificate entry is invalid");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_format != 1) {
+		pr_dbf_msg("Certificate format is not supported");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_hash_type != 1) {
+		pr_dbf_msg("Hash type is not supported");
+		return -EINVAL;
+	}
+
+	return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+	key_ref_t us_keyring_ref;
+
+	us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+					 KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+	if (IS_ERR(us_keyring_ref)) {
+		pr_dbf_msg("Couldn't get user session keyring: %ld",
+			   PTR_ERR(us_keyring_ref));
+		return ERR_PTR(-ENOKEY);
+	}
+	key_ref_put(us_keyring_ref);
+	return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+	unsigned long num_keys, key_index;
+	size_t keyring_payload_len;
+	key_serial_t *key_array;
+	struct key *current_key;
+	int rc;
+
+	keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+	num_keys = keyring_payload_len / sizeof(key_serial_t);
+	key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+	if (!key_array)
+		return -ENOMEM;
+
+	rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+	if (rc != keyring_payload_len) {
+		pr_dbf_msg("Couldn't read keyring payload");
+		goto out;
+	}
+
+	for (key_index = 0; key_index < num_keys; key_index++) {
+		current_key = key_lookup(key_array[key_index]);
+		pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+		key_invalidate(current_key);
+		key_put(current_key);
+		rc = key_unlink(keyring, current_key);
+		if (rc) {
+			pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+			break;
+		}
+	}
+out:
+	kfree(key_array);
+	return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+	key_ref_t cs_keyring_ref;
+	struct key *cs_keyring;
+
+	cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+					&key_type_keyring, CERT_STORE_KEYRING_NAME,
+					false);
+	if (!IS_ERR(cs_keyring_ref)) {
+		cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+		key_ref_put(cs_keyring_ref);
+		goto found;
+	}
+	/* Search default locations: thread, process, session keyrings */
+	cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+	if (IS_ERR(cs_keyring))
+		return NULL;
+	key_put(cs_keyring);
+found:
+	return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+	struct key *cs_keyring;
+
+	cs_keyring = find_cs_keyring();
+	if (!cs_keyring)
+		return;
+
+	pr_dbf_msg("Found cert_store keyring. Purging...");
+	/*
+	 * Remove cert_store_key_type in case invalidation
+	 * of old cert_store keys failed (= severe error).
+	 */
+	if (invalidate_keyring_keys(cs_keyring))
+		unregister_key_type(&key_type_cert_store_key);
+
+	keyring_clear(cs_keyring);
+	key_invalidate(cs_keyring);
+	key_put(cs_keyring);
+	key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+	static struct key *cs_keyring;
+
+	/* Cleanup previous cs_keyring and all associated keys if any. */
+	cleanup_cs_keys();
+	cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+				   GLOBAL_ROOT_GID, current_cred(),
+				   (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+				   KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+				   NULL, get_user_session_keyring());
+	if (IS_ERR(cs_keyring)) {
+		pr_dbf_msg("Can't allocate cert_store keyring");
+		return NULL;
+	}
+
+	pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+	/*
+	 * In case a previous clean-up ran into an
+	 * error and unregistered key type.
+	 */
+	register_key_type(&key_type_cert_store_key);
+
+	return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+	size_t len, name_len;
+	u32 cs_token;
+	char *desc;
+
+	cs_token = vcssb->cs_token;
+	/* Description string contains "%64s:%05u:%010u\0". */
+	name_len = sizeof(vce->vce_hdr.vc_name);
+	len = name_len + 1 + 5 + 1 + 10 + 1;
+	desc = kmalloc(len, GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	memcpy(desc, vce->vce_hdr.vc_name, name_len);
+	snprintf(desc + name_len, len - name_len, ":%05u:%010u",
+		 vce->vce_hdr.vc_index, cs_token);
+
+	return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+			       struct key *keyring)
+{
+	key_ref_t newkey;
+	char *desc;
+	int rc;
+
+	desc = get_key_description(vcssb, vce);
+	if (!desc)
+		return -ENOMEM;
+
+	newkey = key_create_or_update(
+		make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+		desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+		vce->vce_hdr.vc_length,
+		(KEY_POS_ALL & ~KEY_POS_SETATTR)  | KEY_USR_VIEW | KEY_USR_READ,
+		KEY_ALLOC_NOT_IN_QUOTA);
+
+	rc = PTR_ERR_OR_ZERO(newkey);
+	if (rc) {
+		pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+		rc = -ENOKEY;
+		goto out;
+	}
+
+	key_ref_put(newkey);
+out:
+	kfree(desc);
+	return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+	int diag320_rc;
+
+	memset(vcssb, 0, sizeof(*vcssb));
+	vcssb->vcssb_length = VCSSB_LEN_BYTES;
+	diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+	pr_dbf_vcssb(vcssb);
+
+	if (diag320_rc != DIAG320_RC_OK) {
+		pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+		return -EIO;
+	}
+	if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+		pr_dbf_msg("No certificates available for current configuration");
+		return -ENOKEY;
+	}
+
+	return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+	return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+	memset(vcb, 0, sizeof(*vcb));
+	vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+	vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+	/* Request single entry. */
+	vcb->vcb_hdr.first_vc_index = index;
+	vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+	struct vce *extracted_vce;
+
+	extracted_vce = (struct vce *)vcb->vcb_buf;
+	memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+	pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+	int rc, diag320_rc;
+
+	fill_vcb_input(vcssb, vcb, index);
+
+	diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+	pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+	pr_dbf_vcb(vcb);
+
+	switch (diag320_rc) {
+	case DIAG320_RC_OK:
+		rc = 0;
+		if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+			pr_dbf_msg("No certificate entry for index %u", index);
+			rc = -ENOKEY;
+		} else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+			/* Retry on insufficient space. */
+			pr_dbf_msg("Couldn't get all requested certificates");
+			rc = -EAGAIN;
+		}
+		break;
+	case DIAG320_RC_CS_NOMATCH:
+		pr_dbf_msg("Certificate Store token mismatch");
+		rc = -EAGAIN;
+		break;
+	default:
+		pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+				 struct key *keyring)
+{
+	struct vcb *vcb;
+	struct vce *vce;
+	int rc;
+
+	rc = -ENOMEM;
+	vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+	vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+	if (!vcb || !vce)
+		goto out;
+
+	rc = get_sevcb(vcssb, index, vcb);
+	if (rc)
+		goto out;
+
+	extract_vce_from_sevcb(vcb, vce);
+	rc = check_certificate_valid(vce);
+	if (rc)
+		goto out;
+
+	rc = create_key_from_vce(vcssb, vce, keyring);
+	if (rc)
+		goto out;
+
+	pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+	vfree(vce);
+	vfree(vcb);
+	return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+	int rc, index, count, added;
+
+	count = 0;
+	added = 0;
+	/* Certificate Store entries indices start with 1 and have no gaps. */
+	for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+		pr_dbf_msg("Creating key from VCE %u", index);
+		rc = create_key_from_sevcb(vcssb, index, keyring);
+		count++;
+
+		if (rc == -EAGAIN)
+			return rc;
+
+		if (rc)
+			pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+		else
+			added++;
+	}
+
+	if (added == 0) {
+		pr_dbf_msg("Processed %d entries. No keys created", count);
+		return -ENOKEY;
+	}
+
+	pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+	/*
+	 * Do not allow to link more keys to certificate store keyring after all
+	 * the VCEs were processed.
+	 */
+	rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+	if (rc)
+		pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+	return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+	unsigned long ism[ISM_LEN_DWORDS];
+	int rc;
+
+	rc = diag320(0, ism);
+	if (rc != DIAG320_RC_OK) {
+		pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+		return -ENOENT;
+	}
+
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+	debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+	if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+		pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+	struct key *cs_keyring;
+	struct vcssb *vcssb;
+	int rc;
+
+	rc = -ENOMEM;
+	vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+	if (!vcssb)
+		goto cleanup_keys;
+
+	rc = -ENOENT;
+	if (!sclp.has_diag320) {
+		pr_dbf_msg("Certificate Store is not supported");
+		goto cleanup_keys;
+	}
+
+	rc = query_diag320_subcodes();
+	if (rc)
+		goto cleanup_keys;
+
+	rc = get_vcssb(vcssb);
+	if (rc)
+		goto cleanup_keys;
+
+	rc = -ENOMEM;
+	cs_keyring = create_cs_keyring();
+	if (!cs_keyring)
+		goto cleanup_keys;
+
+	rc = add_certificates_to_keyring(vcssb, cs_keyring);
+	if (rc)
+		goto cleanup_cs_keyring;
+
+	goto out;
+
+cleanup_cs_keyring:
+	key_put(cs_keyring);
+cleanup_keys:
+	cleanup_cs_keys();
+out:
+	kfree(vcssb);
+	return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	if (cs_status_val == -1)
+		return sysfs_emit(buf, "uninitialized\n");
+	else if (cs_status_val == 0)
+		return sysfs_emit(buf, "ok\n");
+
+	return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc, retries;
+
+	pr_dbf_msg("Refresh certificate store information requested");
+	rc = mutex_lock_interruptible(&cs_refresh_lock);
+	if (rc)
+		return rc;
+
+	for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+		/* Request certificates from certificate store. */
+		rc = fill_cs_keyring();
+		if (rc)
+			pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+		if (rc != -EAGAIN)
+			break;
+	}
+	cs_status_val = rc;
+	mutex_unlock(&cs_refresh_lock);
+
+	return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+	&cs_status_attr.attr,
+	&refresh_attr.attr,
+	NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+	int rc = -ENOMEM;
+
+	cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+	if (!cert_store_dbf)
+		goto cleanup_dbf;
+
+	cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+	if (!cert_store_hexdump)
+		goto cleanup_dbf;
+
+	debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+	debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+	/* Create directory /sys/firmware/cert_store. */
+	cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+	if (!cert_store_kobj)
+		goto cleanup_dbf;
+
+	rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+	if (rc)
+		goto cleanup_kobj;
+
+	register_key_type(&key_type_cert_store_key);
+
+	return rc;
+
+cleanup_kobj:
+	kobject_put(cert_store_kobj);
+cleanup_dbf:
+	debug_unregister(cert_store_dbf);
+	debug_unregister(cert_store_hexdump);
+
+	return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index eee1ad3e1b29..5a86b9d1da71 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,11 +24,12 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
+#include <asm/vdso-symbols.h>
+#include <asm/access-regs.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
 #include <asm/lowcore.h>
-#include <asm/switch_to.h>
-#include <asm/vdso.h>
+#include <asm/fpu.h>
 #include "compat_linux.h"
 #include "compat_ptrace.h"
 #include "entry.h"
@@ -55,7 +56,7 @@ typedef struct
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fpu_regs();
+	save_user_fpu_regs();
 }
 
 /* Load registers after signal return */
@@ -78,7 +79,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.ufpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 		return -EFAULT;
 	return 0;
@@ -98,10 +99,6 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 		return -EINVAL;
 
-	/* Test the floating-point-control word. */
-	if (test_fp_ctl(user_sregs.fpregs.fpc))
-		return -EINVAL;
-
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
 	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
 		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
@@ -116,7 +113,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+	fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, &current->thread.ufpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -137,13 +134,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+			vxrs[i] = current->thread.ufpu.vxrs[i].low;
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -165,15 +162,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+			current->thread.ufpu.vxrs[i].low = vxrs[i];
 	}
 	return 0;
 }
@@ -187,7 +184,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
 		goto badframe;
 	set_current_blocked(&set);
-	save_fpu_regs();
+	save_user_fpu_regs();
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -210,7 +207,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
-	save_fpu_regs();
+	save_user_fpu_regs();
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -265,7 +262,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 	 * the machine supports it
 	 */
 	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
-	if (!MACHINE_HAS_VX)
+	if (!cpu_has_vx())
 		frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
 			      sizeof(frame->sregs_ext.vxrs_high);
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
@@ -348,11 +345,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 * the machine supports it
 	 */
 	uc_flags = UC_GPRS_HIGH;
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		uc_flags |= UC_VXRS;
-	} else
+	} else {
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
 			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..4b9b34f95d72
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "cpacf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction)						\
+static ssize_t name##_query_raw_read(struct file *fp,				\
+				     struct kobject *kobj,			\
+				     const struct bin_attribute *attr,		\
+				     char *buf, loff_t offs,			\
+				     size_t count)				\
+{										\
+	cpacf_mask_t mask;							\
+										\
+	if (!cpacf_query(CPACF_##instruction, &mask))				\
+		return -EOPNOTSUPP;						\
+	return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask));	\
+}										\
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction)					\
+static ssize_t name##_query_auth_info_raw_read(				\
+	struct file *fp, struct kobject *kobj,				\
+	const struct bin_attribute *attr, char *buf, loff_t offs,	\
+	size_t count)							\
+{									\
+	cpacf_qai_t qai;						\
+									\
+	if (!cpacf_qai(CPACF_##instruction, &qai))			\
+		return -EOPNOTSUPP;					\
+	return memory_read_from_buffer(buf, count, &offs, &qai,		\
+					sizeof(qai));			\
+}									\
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static const struct bin_attribute *const cpacf_attrs[] = {
+	&bin_attr_km_query_raw,
+	&bin_attr_kmc_query_raw,
+	&bin_attr_kimd_query_raw,
+	&bin_attr_klmd_query_raw,
+	&bin_attr_kmac_query_raw,
+	&bin_attr_pckmo_query_raw,
+	&bin_attr_kmf_query_raw,
+	&bin_attr_kmctr_query_raw,
+	&bin_attr_kmo_query_raw,
+	&bin_attr_pcc_query_raw,
+	&bin_attr_prno_query_raw,
+	&bin_attr_kma_query_raw,
+	&bin_attr_kdsa_query_raw,
+	&bin_attr_km_query_auth_info_raw,
+	&bin_attr_kmc_query_auth_info_raw,
+	&bin_attr_kimd_query_auth_info_raw,
+	&bin_attr_klmd_query_auth_info_raw,
+	&bin_attr_kmac_query_auth_info_raw,
+	&bin_attr_pckmo_query_auth_info_raw,
+	&bin_attr_kmf_query_auth_info_raw,
+	&bin_attr_kmctr_query_auth_info_raw,
+	&bin_attr_kmo_query_auth_info_raw,
+	&bin_attr_pcc_query_auth_info_raw,
+	&bin_attr_prno_query_auth_info_raw,
+	&bin_attr_kma_query_auth_info_raw,
+	&bin_attr_kdsa_query_auth_info_raw,
+	NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+	.name = "cpacf",
+	.bin_attrs_new = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+	struct device *cpu_root;
+	int rc = 0;
+
+	cpu_root = bus_get_dev_root(&cpu_subsys);
+	if (cpu_root) {
+		rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+		put_device(cpu_root);
+	}
+	return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 72e106cfd8c7..2f4174b961de 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -16,10 +16,11 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/mm.h>
+#include <linux/io.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/cpcmd.h>
-#include <asm/io.h>
+#include <asm/asm.h>
 
 static DEFINE_SPINLOCK(cpcmd_lock);
 static char cpcmd_buf[241];
@@ -45,12 +46,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
 	ry.odd	= *rlen;
 	asm volatile(
 		"	diag	%[rx],%[ry],0x8\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [ry] "+d" (ry.pair)
 		: [rx] "d" (rx.pair)
-		: "cc");
-	if (cc)
+		: CC_CLOBBER);
+	if (CC_TRANSFORM(cc))
 		*rlen += ry.odd;
 	else
 		*rlen = ry.odd;
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..76210f001028
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/machine.h>
+#include <asm/elf.h>
+
+enum {
+	TYPE_HWCAP,
+	TYPE_FACILITY,
+	TYPE_MACHINE,
+};
+
+struct s390_cpu_feature {
+	unsigned int type	: 4;
+	unsigned int num	: 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+	[S390_CPU_FEATURE_MSA]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+	[S390_CPU_FEATURE_VXRS]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+	[S390_CPU_FEATURE_UV]	= {.type = TYPE_FACILITY, .num = 158},
+	[S390_CPU_FEATURE_D288]	= {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	struct s390_cpu_feature *feature;
+
+	if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+		return 0;
+	feature = &s390_cpu_features[num];
+	switch (feature->type) {
+	case TYPE_HWCAP:
+		return !!(elf_hwcap & BIT(feature->num));
+	case TYPE_FACILITY:
+		return test_facility(feature->num);
+	case TYPE_MACHINE:
+		return test_machine_feature(feature->num);
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..adb164223f8c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,8 @@
 #include <asm/elf.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
+#include <asm/maccess.h>
+#include <asm/fpu.h>
 
 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -45,7 +47,7 @@ struct save_area {
 	u64 fprs[16];
 	u32 fpc;
 	u32 prefix;
-	u64 todpreg;
+	u32 todpreg;
 	u64 timer;
 	u64 todcmp;
 	u64 vxrs_low[16];
@@ -61,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 {
 	struct save_area *sa;
 
-	sa = memblock_alloc(sizeof(*sa), 8);
-	if (!sa)
-		panic("Failed to allocate save area\n");
+	sa = memblock_alloc_or_panic(sizeof(*sa), 8);
 
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
@@ -109,43 +109,20 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
 
 	/* Copy lower halves of vector registers 0-15 */
 	for (i = 0; i < 16; i++)
-		memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+		sa->vxrs_low[i] = vxrs[i].low;
 	/* Copy vector registers 16-31 */
 	memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
 }
 
-/*
- * Return physical address for virtual address
- */
-static inline void *load_real_addr(void *addr)
-{
-	unsigned long real_addr;
-
-	asm volatile(
-		   "	lra     %0,0(%1)\n"
-		   "	jz	0f\n"
-		   "	la	%0,0\n"
-		   "0:"
-		   : "=a" (real_addr) : "a" (addr) : "cc");
-	return (void *)real_addr;
-}
-
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
 {
-	unsigned long len;
-	void *ra;
-	int rc;
+	size_t len, copied, res = 0;
 
 	while (count) {
 		if (!oldmem_data.start && src < sclp.hsa_size) {
 			/* Copy from zfcp/nvme dump HSA area */
 			len = min(count, sclp.hsa_size - src);
-			rc = memcpy_hsa_kernel(dst, src, len);
-			if (rc)
-				return rc;
+			copied = memcpy_hsa_iter(iter, src, len);
 		} else {
 			/* Check for swapped kdump oldmem areas */
 			if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
@@ -157,56 +134,27 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
 			} else {
 				len = count;
 			}
-			if (is_vmalloc_or_module_addr(dst)) {
-				ra = load_real_addr(dst);
-				len = min(PAGE_SIZE - offset_in_page(ra), len);
-			} else {
-				ra = dst;
-			}
-			if (memcpy_real(ra, src, len))
-				return -EFAULT;
+			copied = memcpy_real_iter(iter, src, len);
 		}
-		dst += len;
-		src += len;
-		count -= len;
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
 	}
-	return 0;
+	return res;
 }
 
-/*
- * Copy memory of the old, dumped system to a user space virtual address
- */
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
 {
-	unsigned long len;
-	int rc;
+	struct iov_iter iter;
+	struct kvec kvec;
 
-	while (count) {
-		if (!oldmem_data.start && src < sclp.hsa_size) {
-			/* Copy from zfcp/nvme dump HSA area */
-			len = min(count, sclp.hsa_size - src);
-			rc = memcpy_hsa_user(dst, src, len);
-			if (rc)
-				return rc;
-		} else {
-			/* Check for swapped kdump oldmem areas */
-			if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
-				src -= oldmem_data.start;
-				len = min(count, oldmem_data.size - src);
-			} else if (oldmem_data.start && src < oldmem_data.size) {
-				len = min(count, oldmem_data.size - src);
-				src += oldmem_data.start;
-			} else {
-				len = count;
-			}
-			rc = copy_to_user_real(dst, src, count);
-			if (rc)
-				return rc;
-		}
-		dst += len;
-		src += len;
-		count -= len;
-	}
+	kvec.iov_base = dst;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+	if (copy_oldmem_iter(&iter, src, count) < count)
+		return -EFAULT;
 	return 0;
 }
 
@@ -217,26 +165,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
 			 unsigned long offset)
 {
 	unsigned long src;
-	int rc;
 
-	if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
-		return -EINVAL;
-	/* Multi-segment iterators are not supported */
-	if (iter->nr_segs > 1)
-		return -EINVAL;
-	if (!csize)
-		return 0;
 	src = pfn_to_phys(pfn) + offset;
-
-	/* XXX: pass the iov_iter down to a common function */
-	if (iter_is_iovec(iter))
-		rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
-	else
-		rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
-	if (rc < 0)
-		return rc;
-	iov_iter_advance(iter, csize);
-	return csize;
+	return copy_oldmem_iter(iter, src, csize);
 }
 
 /*
@@ -304,14 +235,16 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
 						       prot);
 }
 
-static const char *nt_name(Elf64_Word type)
+/*
+ * Return true only when in a kdump or stand-alone kdump environment.
+ * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump"
+ * environments, where this function returns false; see dump_available().
+ */
+bool is_kdump_kernel(void)
 {
-	const char *name = "LINUX";
-
-	if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
-		name = KEXEC_CORE_NOTE_NAME;
-	return name;
+	return oldmem_data.start;
 }
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
 
 /*
  * Initialize ELF note
@@ -337,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
 	return PTR_ADD(buf, len);
 }
 
-static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
-{
-	return nt_init_name(buf, type, desc, d_len, nt_name(type));
-}
+#define nt_init(buf, type, desc) \
+	nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type)
 
 /*
  * Calculate the size of ELF note
@@ -356,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name)
 	return size;
 }
 
-static inline size_t nt_size(Elf64_Word type, int d_len)
-{
-	return nt_size_name(d_len, nt_name(type));
-}
+#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type)
 
 /*
  * Fill ELF notes for one CPU with save area registers
@@ -380,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
 	memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
 	memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
 	/* Create ELF notes for the CPU */
-	ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
-	ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
-	ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
-	ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
-	ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
-	ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
-	ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
-	if (MACHINE_HAS_VX) {
-		ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
-			      &sa->vxrs_high, sizeof(sa->vxrs_high));
-		ptr = nt_init(ptr, NT_S390_VXRS_LOW,
-			      &sa->vxrs_low, sizeof(sa->vxrs_low));
+	ptr = nt_init(ptr, PRSTATUS, nt_prstatus);
+	ptr = nt_init(ptr, PRFPREG, nt_fpregset);
+	ptr = nt_init(ptr, S390_TIMER, sa->timer);
+	ptr = nt_init(ptr, S390_TODCMP, sa->todcmp);
+	ptr = nt_init(ptr, S390_TODPREG, sa->todpreg);
+	ptr = nt_init(ptr, S390_CTRS, sa->ctrs);
+	ptr = nt_init(ptr, S390_PREFIX, sa->prefix);
+	if (cpu_has_vx()) {
+		ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high);
+		ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low);
 	}
 	return ptr;
 }
@@ -404,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void)
 	struct save_area *sa = NULL;
 	size_t size;
 
-	size =	nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
-	size +=  nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
-	size +=  nt_size(NT_S390_TIMER, sizeof(sa->timer));
-	size +=  nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
-	size +=  nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
-	size +=  nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
-	size +=  nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
-	if (MACHINE_HAS_VX) {
-		size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
-		size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+	size =	nt_size(PRSTATUS, struct elf_prstatus);
+	size += nt_size(PRFPREG, elf_fpregset_t);
+	size += nt_size(S390_TIMER, sa->timer);
+	size += nt_size(S390_TODCMP, sa->todcmp);
+	size += nt_size(S390_TODPREG, sa->todpreg);
+	size += nt_size(S390_CTRS, sa->ctrs);
+	size += nt_size(S390_PREFIX, sa->prefix);
+	if (cpu_has_vx()) {
+		size += nt_size(S390_VXRS_HIGH, sa->vxrs_high);
+		size += nt_size(S390_VXRS_LOW, sa->vxrs_low);
 	}
 
 	return size;
@@ -428,8 +354,8 @@ static void *nt_prpsinfo(void *ptr)
 
 	memset(&prpsinfo, 0, sizeof(prpsinfo));
 	prpsinfo.pr_sname = 'R';
-	strcpy(prpsinfo.pr_fname, "vmlinux");
-	return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+	strscpy(prpsinfo.pr_fname, "vmlinux");
+	return nt_init(ptr, PRPSINFO, prpsinfo);
 }
 
 /*
@@ -518,7 +444,7 @@ static void *nt_final(void *ptr)
 /*
  * Initialize ELF header (new kernel)
  */
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
 {
 	memset(ehdr, 0, sizeof(*ehdr));
 	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -532,7 +458,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
 	ehdr->e_phoff = sizeof(Elf64_Ehdr);
 	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf64_Phdr);
-	ehdr->e_phnum = mem_chunk_cnt + 1;
+	/* Number of PT_LOAD program headers plus PT_NOTE program header */
+	ehdr->e_phnum = phdr_count + 1;
 	return ehdr + 1;
 }
 
@@ -563,27 +490,77 @@ static int get_mem_chunk_cnt(void)
 	return cnt;
 }
 
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+		unsigned long vaddr, unsigned long size)
+{
+	phdr->p_type = PT_LOAD;
+	phdr->p_vaddr = vaddr;
+	phdr->p_offset = paddr;
+	phdr->p_paddr = paddr;
+	phdr->p_filesz = size;
+	phdr->p_memsz = size;
+	phdr->p_flags = PF_R | PF_W | PF_X;
+	phdr->p_align = PAGE_SIZE;
+}
+
 /*
  * Initialize ELF loads (new kernel)
  */
-static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
 {
+	unsigned long old_identity_base = 0;
 	phys_addr_t start, end;
 	u64 idx;
 
+	if (os_info_has_vm)
+		old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
 	for_each_physmem_range(idx, &oldmem_type, &start, &end) {
-		phdr->p_filesz = end - start;
-		phdr->p_type = PT_LOAD;
-		phdr->p_offset = start;
-		phdr->p_vaddr = start;
-		phdr->p_paddr = start;
-		phdr->p_memsz = end - start;
-		phdr->p_flags = PF_R | PF_W | PF_X;
-		phdr->p_align = PAGE_SIZE;
+		fill_ptload(phdr, start, old_identity_base + start,
+			    end - start);
 		phdr++;
 	}
 }
 
+static bool os_info_has_vm(void)
+{
+	return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+		unsigned long long paddr, unsigned long long size)
+{
+	unsigned long old_identity_base = 0;
+
+	if (os_info_has_vm())
+		old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+	fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
+/*
+ * Prepare PT_LOAD type program header for kernel image region
+ */
+static void text_init(Elf64_Phdr *phdr)
+{
+	unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS);
+	unsigned long start = os_info_old_value(OS_INFO_IMAGE_START);
+	unsigned long end = os_info_old_value(OS_INFO_IMAGE_END);
+
+	phdr->p_type = PT_LOAD;
+	phdr->p_vaddr = start;
+	phdr->p_filesz = end - start;
+	phdr->p_memsz = end - start;
+	phdr->p_offset = start_phys;
+	phdr->p_paddr = start_phys;
+	phdr->p_flags = PF_R | PF_W | PF_X;
+	phdr->p_align = PAGE_SIZE;
+}
+
 /*
  * Initialize notes (new kernel)
  */
@@ -609,7 +586,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
 	return ptr;
 }
 
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
 {
 	size_t size;
 
@@ -617,7 +594,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
 	/* PT_NOTES */
 	size += sizeof(Elf64_Phdr);
 	/* nt_prpsinfo */
-	size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+	size += nt_size(PRPSINFO, struct elf_prpsinfo);
 	/* regsets */
 	size += get_cpu_cnt() * get_cpu_elf_notes_size();
 	/* nt_vmcoreinfo */
@@ -625,7 +602,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
 	/* nt_final */
 	size += sizeof(Elf64_Nhdr);
 	/* PT_LOADS */
-	size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+	size += phdr_count * sizeof(Elf64_Phdr);
 
 	return size;
 }
@@ -635,10 +612,10 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
  */
 int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 {
-	Elf64_Phdr *phdr_notes, *phdr_loads;
-	int mem_chunk_cnt;
+	Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+	int mem_chunk_cnt, phdr_text_cnt;
+	size_t alloc_size;
 	void *ptr, *hdr;
-	u32 alloc_size;
 	u64 hdr_off;
 
 	/* If we are not in kdump or zfcp/nvme dump mode return */
@@ -656,12 +633,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 	}
 
 	mem_chunk_cnt = get_mem_chunk_cnt();
+	phdr_text_cnt = os_info_has_vm() ? 1 : 0;
 
-	alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+	alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
 
 	hdr = kzalloc(alloc_size, GFP_KERNEL);
 
-	/* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+	/*
+	 * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
 	 * a dump with this crash kernel will fail. Panic now to allow other
 	 * dump mechanisms to take over.
 	 */
@@ -669,18 +648,25 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 		panic("s390 kdump allocating elfcorehdr failed");
 
 	/* Init elf header */
-	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
 	/* Init program headers */
-	phdr_notes = ptr;
-	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
-	phdr_loads = ptr;
-	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	if (phdr_text_cnt) {
+		phdr_text = phdr_notes + 1;
+		phdr_loads = phdr_text + 1;
+	} else {
+		phdr_loads = phdr_notes + 1;
+	}
+	ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
 	/* Init notes */
 	hdr_off = PTR_DIFF(ptr, hdr);
 	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+	/* Init kernel text program header */
+	if (phdr_text_cnt)
+		text_init(phdr_text);
 	/* Init loads */
+	loads_init(phdr_loads, phdr_text_cnt);
+	/* Finalize program headers */
 	hdr_off = PTR_DIFF(ptr, hdr);
-	loads_init(phdr_loads, hdr_off);
 	*addr = (unsigned long long) hdr;
 	*size = (unsigned long long) hdr_off;
 	BUG_ON(elfcorehdr_size > alloc_size);
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
new file mode 100644
index 000000000000..8cc26cf2c64a
--- /dev/null
+++ b/arch/s390/kernel/ctlreg.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/irqflags.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+#include <asm/abs_lowcore.h>
+#include <asm/ctlreg.h>
+
+/*
+ * ctl_lock guards access to global control register contents which
+ * are kept in the control register save area within absolute lowcore
+ * at physical address zero.
+ */
+static DEFINE_SPINLOCK(system_ctl_lock);
+
+void system_ctlreg_lock(void)
+	__acquires(&system_ctl_lock)
+{
+	spin_lock(&system_ctl_lock);
+}
+
+void system_ctlreg_unlock(void)
+	__releases(&system_ctl_lock)
+{
+	spin_unlock(&system_ctl_lock);
+}
+
+static bool system_ctlreg_area_init __ro_after_init;
+
+void __init system_ctlreg_init_save_area(struct lowcore *lc)
+{
+	struct lowcore *abs_lc;
+
+	abs_lc = get_abs_lowcore();
+	__local_ctl_store(0, 15, lc->cregs_save_area);
+	__local_ctl_store(0, 15, abs_lc->cregs_save_area);
+	put_abs_lowcore(abs_lc);
+	system_ctlreg_area_init = true;
+}
+
+struct ctlreg_parms {
+	unsigned long andval;
+	unsigned long orval;
+	unsigned long val;
+	int request;
+	int cr;
+};
+
+static void ctlreg_callback(void *info)
+{
+	struct ctlreg_parms *pp = info;
+	struct ctlreg regs[16];
+
+	__local_ctl_store(0, 15, regs);
+	if (pp->request == CTLREG_LOAD) {
+		regs[pp->cr].val = pp->val;
+	} else {
+		regs[pp->cr].val &= pp->andval;
+		regs[pp->cr].val |= pp->orval;
+	}
+	__local_ctl_load(0, 15, regs);
+}
+
+static void system_ctlreg_update(void *info)
+{
+	unsigned long flags;
+
+	if (system_state == SYSTEM_BOOTING) {
+		/*
+		 * For very early calls do not call on_each_cpu()
+		 * since not everything might be setup.
+		 */
+		local_irq_save(flags);
+		ctlreg_callback(info);
+		local_irq_restore(flags);
+	} else {
+		on_each_cpu(ctlreg_callback, info, 1);
+	}
+}
+
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request)
+{
+	struct ctlreg_parms pp = { .cr = cr, .request = request, };
+	struct lowcore *abs_lc;
+
+	switch (request) {
+	case CTLREG_SET_BIT:
+		pp.orval  = 1UL << data;
+		pp.andval = -1UL;
+		break;
+	case CTLREG_CLEAR_BIT:
+		pp.orval  = 0;
+		pp.andval = ~(1UL << data);
+		break;
+	case CTLREG_LOAD:
+		pp.val = data;
+		break;
+	}
+	if (system_ctlreg_area_init) {
+		system_ctlreg_lock();
+		abs_lc = get_abs_lowcore();
+		if (request == CTLREG_LOAD) {
+			abs_lc->cregs_save_area[cr].val = pp.val;
+		} else {
+			abs_lc->cregs_save_area[cr].val &= pp.andval;
+			abs_lc->cregs_save_area[cr].val |= pp.orval;
+		}
+		put_abs_lowcore(abs_lc);
+		system_ctlreg_update(&pp);
+		system_ctlreg_unlock();
+	} else {
+		system_ctlreg_update(&pp);
+	}
+}
+EXPORT_SYMBOL(system_ctlreg_modify);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..2a41be2f7925 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/math.h>
 #include <linux/minmax.h>
 #include <linux/debugfs.h>
 
@@ -38,13 +39,13 @@
 
 typedef struct file_private_info {
 	loff_t offset;			/* offset of last read in file */
-	int    act_area;		/* number of last formated area */
+	int    act_area;		/* number of last formatted area */
 	int    act_page;		/* act page in given area */
-	int    act_entry;		/* last formated entry (offset */
+	int    act_entry;		/* last formatted entry (offset */
 					/* relative to beginning of last */
-					/* formated page) */
+					/* formatted page) */
 	size_t act_entry_offset;	/* up to this offset we copied */
-					/* in last read the last formated */
+					/* in last read the last formatted */
 					/* entry to userland */
 	char   temp_buf[2048];		/* buffer for output */
 	debug_info_t *debug_info_org;	/* original debug information */
@@ -60,10 +61,10 @@ typedef struct {
 	 * except of floats, and long long (32 bit)
 	 *
 	 */
-	long args[0];
+	long args[];
 } debug_sprintf_entry_t;
 
-/* internal function prototyes */
+/* internal function prototypes */
 
 static int debug_init(void);
 static ssize_t debug_output(struct file *file, char __user *user_buf,
@@ -77,12 +78,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
 static void debug_info_get(debug_info_t *);
 static void debug_info_put(debug_info_t *);
 static int debug_prolog_level_fn(debug_info_t *id,
-				 struct debug_view *view, char *out_buf);
+				 struct debug_view *view, char *out_buf,
+				 size_t out_buf_size);
 static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
 				struct file *file, const char __user *user_buf,
 				size_t user_buf_size, loff_t *offset);
 static int debug_prolog_pages_fn(debug_info_t *id,
-				 struct debug_view *view, char *out_buf);
+				 struct debug_view *view, char *out_buf,
+				 size_t out_buf_size);
 static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
 				struct file *file, const char __user *user_buf,
 				size_t user_buf_size, loff_t *offset);
@@ -90,9 +93,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
 				struct file *file, const char __user *user_buf,
 				size_t user_buf_size, loff_t *offset);
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
-				     char *out_buf, const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
-				   char *out_buf, debug_sprintf_entry_t *curr_event);
+				     char *out_buf, size_t out_buf_size,
+				     const char *in_buf);
 static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
 static void debug_events_append(debug_info_t *dest, debug_info_t *src);
 
@@ -139,7 +141,7 @@ struct debug_view debug_sprintf_view = {
 	"sprintf",
 	NULL,
 	&debug_dflt_header_fn,
-	(debug_format_proc_t *)&debug_sprintf_format_fn,
+	&debug_sprintf_format_fn,
 	NULL,
 	NULL
 };
@@ -163,7 +165,6 @@ static const struct file_operations debug_file_ops = {
 	.write	 = debug_input,
 	.open	 = debug_open,
 	.release = debug_close,
-	.llseek  = no_llseek,
 };
 
 static struct dentry *debug_debugfs_root_entry;
@@ -250,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
 	rc->level	   = level;
 	rc->buf_size	   = buf_size;
 	rc->entry_size	   = sizeof(debug_entry_t) + buf_size;
-	strlcpy(rc->name, name, sizeof(rc->name));
+	strscpy(rc->name, name);
 	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
 	memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
 	refcount_set(&(rc->ref_count), 0);
@@ -351,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
 	for (i = 0; i < in->nr_areas; i++) {
 		for (j = 0; j < in->pages_per_area; j++)
 			memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+		rc->active_pages[i] = in->active_pages[i];
+		rc->active_entries[i] = in->active_entries[i];
 	}
+	rc->active_area = in->active_area;
 out:
 	spin_unlock_irqrestore(&in->lock, flags);
 	return rc;
@@ -381,7 +385,7 @@ static void debug_info_put(debug_info_t *db_info)
 
 /*
  * debug_format_entry:
- * - format one debug entry and return size of formated data
+ * - format one debug entry and return size of formatted data
  */
 static int debug_format_entry(file_private_info_t *p_info)
 {
@@ -392,8 +396,10 @@ static int debug_format_entry(file_private_info_t *p_info)
 
 	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
 		/* print prolog */
-		if (view->prolog_proc)
-			len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+		if (view->prolog_proc) {
+			len += view->prolog_proc(id_snap, view, p_info->temp_buf,
+						 sizeof(p_info->temp_buf));
+		}
 		goto out;
 	}
 	if (!id_snap->areas) /* this is true, if we have a prolog only view */
@@ -403,21 +409,31 @@ static int debug_format_entry(file_private_info_t *p_info)
 
 	if (act_entry->clock == 0LL)
 		goto out; /* empty entry */
-	if (view->header_proc)
+	if (view->header_proc) {
 		len += view->header_proc(id_snap, view, p_info->act_area,
-					 act_entry, p_info->temp_buf + len);
-	if (view->format_proc)
+					 act_entry, p_info->temp_buf + len,
+					 sizeof(p_info->temp_buf) - len);
+	}
+	if (view->format_proc) {
 		len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+					 sizeof(p_info->temp_buf) - len,
 					 DEBUG_DATA(act_entry));
+	}
 out:
 	return len;
 }
 
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
  */
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
 {
 	debug_info_t *id;
 
@@ -425,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
 	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
 		p_info->act_entry = 0;
 		p_info->act_page  = 0;
-		goto out;
+		return true;
 	}
 	if (!id->areas)
-		return 1;
+		return false;
 	p_info->act_entry += id->entry_size;
 	/* switch to next page, if we reached the end of the page  */
 	if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -441,16 +457,93 @@ static inline int debug_next_entry(file_private_info_t *p_info)
 			p_info->act_page = 0;
 		}
 		if (p_info->act_area >= id->nr_areas)
-			return 1;
+			return false;
 	}
-out:
-	return 0;
+	return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+	debug_info_t *snap_id;
+
+	snap_id = p_info->debug_info_snap;
+	p_info->act_area = snap_id->active_area;
+	p_info->act_page = snap_id->active_pages[snap_id->active_area];
+	p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info:	Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id;
+
+	id = p_info->debug_info_snap;
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+		debug_to_act_entry(p_info);
+	if (!id->areas)
+		return false;
+	p_info->act_entry -= id->entry_size;
+	/* switch to prev page, if we reached the beginning of the page  */
+	if (p_info->act_entry < 0) {
+		/* end of previous page */
+		p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+		p_info->act_page--;
+		if (p_info->act_page < 0) {
+			/* previous area */
+			p_info->act_area--;
+			p_info->act_page = id->pages_per_area - 1;
+		}
+		if (p_info->act_area < 0)
+			p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+	}
+	/* check full circle */
+	if (id->active_area == p_info->act_area &&
+	    id->active_pages[id->active_area] == p_info->act_page &&
+	    id->active_entries[id->active_area] == p_info->act_entry)
+		return false;
+	return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info:	Private info that is manipulated
+ * @reverse:	If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+	if (reverse)
+		return debug_prev_entry(p_info);
+	else
+		return debug_next_entry(p_info);
 }
 
 /*
  * debug_output:
  * - called for user read()
- * - copies formated debug entries to the user buffer
+ * - copies formatted debug entries to the user buffer
  */
 static ssize_t debug_output(struct file *file,		/* file descriptor */
 			    char __user *user_buf,	/* user buffer */
@@ -486,7 +579,7 @@ static ssize_t debug_output(struct file *file,		/* file descriptor */
 		}
 		if (copy_size == formatted_line_residue) {
 			entry_offset = 0;
-			if (debug_next_entry(p_info))
+			if (!debug_next_entry(p_info))
 				goto out;
 		}
 	}
@@ -521,15 +614,51 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
 	return rc; /* number of input characters */
 }
 
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+						     struct debug_view *view)
+{
+	debug_info_t *debug_info_snapshot;
+	file_private_info_t *p_info;
+
+	/*
+	 * Make snapshot of current debug areas to get it consistent.
+	 * To copy all the areas is only needed, if we have a view which
+	 * formats the debug areas.
+	 */
+	if (!view->format_proc && !view->header_proc)
+		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+	else
+		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+	if (!debug_info_snapshot)
+		return NULL;
+	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	if (!p_info) {
+		debug_info_free(debug_info_snapshot);
+		return NULL;
+	}
+	p_info->offset = 0;
+	p_info->debug_info_snap = debug_info_snapshot;
+	p_info->debug_info_org	= debug_info;
+	p_info->view = view;
+	p_info->act_area = 0;
+	p_info->act_page = 0;
+	p_info->act_entry = DEBUG_PROLOG_ENTRY;
+	p_info->act_entry_offset = 0;
+	debug_info_get(debug_info);
+
+	return p_info;
+}
+
 /*
  * debug_open:
  * - called for user open()
- * - copies formated output to private_data area of the file
+ * - copies formatted output to private_data area of the file
  *   handle
  */
 static int debug_open(struct inode *inode, struct file *file)
 {
-	debug_info_t *debug_info, *debug_info_snapshot;
+	debug_info_t *debug_info;
 	file_private_info_t *p_info;
 	int i, rc = 0;
 
@@ -547,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
 	goto out;
 
 found:
-
-	/* Make snapshot of current debug areas to get it consistent.	  */
-	/* To copy all the areas is only needed, if we have a view which  */
-	/* formats the debug areas. */
-
-	if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
-		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
-	else
-		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
-	if (!debug_info_snapshot) {
-		rc = -ENOMEM;
-		goto out;
-	}
-	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
 	if (!p_info) {
-		debug_info_free(debug_info_snapshot);
 		rc = -ENOMEM;
 		goto out;
 	}
-	p_info->offset = 0;
-	p_info->debug_info_snap = debug_info_snapshot;
-	p_info->debug_info_org	= debug_info;
-	p_info->view = debug_info->views[i];
-	p_info->act_area = 0;
-	p_info->act_page = 0;
-	p_info->act_entry = DEBUG_PROLOG_ENTRY;
-	p_info->act_entry_offset = 0;
 	file->private_data = p_info;
-	debug_info_get(debug_info);
 	nonseekable_open(inode, file);
 out:
 	mutex_unlock(&debug_mutex);
 	return rc;
 }
 
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+	if (p_info->debug_info_snap)
+		debug_info_free(p_info->debug_info_snap);
+	debug_info_put(p_info->debug_info_org);
+	kfree(p_info);
+}
+
 /*
  * debug_close:
  * - called for user close()
@@ -593,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
 	file_private_info_t *p_info;
 
 	p_info = (file_private_info_t *) file->private_data;
-	if (p_info->debug_info_snap)
-		debug_info_free(p_info->debug_info_snap);
-	debug_info_put(p_info->debug_info_org);
-	kfree(file->private_data);
+	debug_file_private_free(p_info);
+	file->private_data = NULL;
 	return 0; /* success */
 }
 
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id:		Debug information to use
+ * @view:	View with which to dump the debug information
+ * @buf:	Buffer the textual debug data representation is written to
+ * @buf_size:	Size of the buffer, including the trailing '\0' byte
+ * @reverse:	Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+		   char *buf, size_t buf_size, bool reverse)
+{
+	file_private_info_t *p_info;
+	size_t size, offset = 0;
+
+	/* Need space for '\0' byte */
+	if (buf_size < 1)
+		return 0;
+	buf_size--;
+
+	p_info = debug_file_private_alloc(id, view);
+	if (!p_info)
+		return -ENOMEM;
+
+	/* There is always at least the DEBUG_PROLOG_ENTRY */
+	do {
+		size = debug_format_entry(p_info);
+		size = min(size, buf_size - offset);
+		memcpy(buf + offset, p_info->temp_buf, size);
+		offset += size;
+		if (offset >= buf_size)
+			break;
+	} while (debug_move_entry(p_info, reverse));
+	debug_file_private_free(p_info);
+	buf[offset] = '\0';
+
+	return offset;
+}
+
 /* Create debugfs entries and add to internal list. */
 static void _debug_register(debug_info_t *id)
 {
@@ -954,7 +1113,7 @@ static int debug_active = 1;
  * always allow read, allow write only if debug_stoppable is set or
  * if debug_active is already off
  */
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
 			      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!write || debug_stoppable || !debug_active)
@@ -963,7 +1122,7 @@ static int s390dbf_procactive(struct ctl_table *table, int write,
 		return 0;
 }
 
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
 	{
 		.procname	= "debug_stoppable",
 		.data		= &debug_stoppable,
@@ -978,17 +1137,6 @@ static struct ctl_table s390dbf_table[] = {
 		.mode		= S_IRUGO | S_IWUSR,
 		.proc_handler	= s390dbf_procactive,
 	},
-	{ }
-};
-
-static struct ctl_table s390dbf_dir_table[] = {
-	{
-		.procname	= "s390dbf",
-		.maxlen		= 0,
-		.mode		= S_IRUGO | S_IXUGO,
-		.child		= s390dbf_table,
-	},
-	{ }
 };
 
 static struct ctl_table_header *s390dbf_sysctl_header;
@@ -1304,9 +1452,9 @@ static inline int debug_get_uint(char *buf)
  */
 
 static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
-				 char *out_buf)
+				 char *out_buf, size_t out_buf_size)
 {
-	return sprintf(out_buf, "%i\n", id->pages_per_area);
+	return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
 }
 
 /*
@@ -1353,14 +1501,14 @@ out:
  * prints out actual debug level
  */
 static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
-				 char *out_buf)
+				 char *out_buf, size_t out_buf_size)
 {
 	int rc = 0;
 
 	if (id->level == DEBUG_OFF_LEVEL)
-		rc = sprintf(out_buf, "-\n");
+		rc = scnprintf(out_buf, out_buf_size, "-\n");
 	else
-		rc = sprintf(out_buf, "%i\n", id->level);
+		rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
 	return rc;
 }
 
@@ -1477,22 +1625,24 @@ out:
  * prints debug data in hex/ascii format
  */
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
-				     char *out_buf, const char *in_buf)
+				     char *out_buf, size_t out_buf_size, const char *in_buf)
 {
 	int i, rc = 0;
 
-	for (i = 0; i < id->buf_size; i++)
-		rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
-	rc += sprintf(out_buf + rc, "| ");
+	for (i = 0; i < id->buf_size; i++) {
+		rc += scnprintf(out_buf + rc, out_buf_size - rc,
+				"%02x ", ((unsigned char *)in_buf)[i]);
+	}
+	rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
 	for (i = 0; i < id->buf_size; i++) {
 		unsigned char c = in_buf[i];
 
 		if (isascii(c) && isprint(c))
-			rc += sprintf(out_buf + rc, "%c", c);
+			rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
 		else
-			rc += sprintf(out_buf + rc, ".");
+			rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
 	}
-	rc += sprintf(out_buf + rc, "\n");
+	rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
 	return rc;
 }
 
@@ -1500,7 +1650,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
  * prints header for debug entry
  */
 int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
-			 int area, debug_entry_t *entry, char *out_buf)
+			 int area, debug_entry_t *entry, char *out_buf,
+			 size_t out_buf_size)
 {
 	unsigned long sec, usec;
 	unsigned long caller;
@@ -1517,23 +1668,24 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
 	else
 		except_str = "-";
 	caller = (unsigned long) entry->caller;
-	rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px  ",
-		      area, sec, usec, level, except_str,
-		      entry->cpu, (void *)caller);
+	rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px  ",
+			area, sec, usec, level, except_str,
+			entry->cpu, (void *)caller);
 	return rc;
 }
 EXPORT_SYMBOL(debug_dflt_header_fn);
 
 /*
- * prints debug data sprintf-formated:
+ * prints debug data sprintf-formatted:
  * debug_sprinf_event/exception calls must be used together with this view
  */
 
 #define DEBUG_SPRINTF_MAX_ARGS 10
 
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
-				   char *out_buf, debug_sprintf_entry_t *curr_event)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+			    char *out_buf, size_t out_buf_size, const char *inbuf)
 {
+	debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
 	int num_longs, num_used_args = 0, i, rc = 0;
 	int index[DEBUG_SPRINTF_MAX_ARGS];
 
@@ -1544,8 +1696,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
 		goto out; /* bufsize of entry too small */
 	if (num_longs == 1) {
 		/* no args, we use only the string */
-		strcpy(out_buf, curr_event->string);
-		rc = strlen(curr_event->string);
+		rc = strscpy(out_buf, curr_event->string, out_buf_size);
+		if (rc == -E2BIG)
+			rc = out_buf_size;
 		goto out;
 	}
 
@@ -1557,15 +1710,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
 	for (i = 0; i < num_used_args; i++)
 		index[i] = i;
 
-	rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
-		     curr_event->args[index[1]], curr_event->args[index[2]],
-		     curr_event->args[index[3]], curr_event->args[index[4]],
-		     curr_event->args[index[5]], curr_event->args[index[6]],
-		     curr_event->args[index[7]], curr_event->args[index[8]],
-		     curr_event->args[index[9]]);
+	rc = scnprintf(out_buf, out_buf_size,
+		       curr_event->string, curr_event->args[index[0]],
+		       curr_event->args[index[1]], curr_event->args[index[2]],
+		       curr_event->args[index[3]], curr_event->args[index[4]],
+		       curr_event->args[index[5]], curr_event->args[index[6]],
+		       curr_event->args[index[7]], curr_event->args[index[8]],
+		       curr_event->args[index[9]]);
 out:
 	return rc;
 }
+EXPORT_SYMBOL(debug_sprintf_format_fn);
 
 /*
  * debug_init:
@@ -1573,7 +1728,7 @@ out:
  */
 static int __init debug_init(void)
 {
-	s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+	s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table);
 	mutex_lock(&debug_mutex);
 	debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
 	initialized = 1;
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y	:= diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index a778714e4d8b..56b862ba9be8 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -11,11 +11,13 @@
 #include <linux/cpu.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-extable.h>
 #include <asm/diag.h>
 #include <asm/trace/diag.h>
 #include <asm/sections.h>
-#include "entry.h"
+#include <asm/asm.h>
+#include "../entry.h"
 
 struct diag_stat {
 	unsigned int counter[NR_DIAG_STAT];
@@ -35,6 +37,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
 	[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
 	[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+	[DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
 	[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
 	[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
 	[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
@@ -48,7 +51,11 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
 	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
 	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
 	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
 	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+	[DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
+	[DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
 	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
 
@@ -57,12 +64,16 @@ struct diag_ops __amode31_ref diag_amode31_ops = {
 	.diag26c = _diag26c_amode31,
 	.diag14 = _diag14_amode31,
 	.diag0c = _diag0c_amode31,
+	.diag8c = _diag8c_amode31,
 	.diag308_reset = _diag308_reset_amode31
 };
 
 static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
 struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
 
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
 static int show_diag_stat(struct seq_file *m, void *v)
 {
 	struct diag_stat *stat;
@@ -140,20 +151,51 @@ void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
 EXPORT_SYMBOL(diag_stat_inc_norecursion);
 
 /*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data)
+{
+	diag_stat_inc(DIAG_STAT_X00C);
+	diag_amode31_ops.diag0c(virt_to_phys(data));
+}
+
+/*
  * Diagnose 14: Input spool file manipulation
+ *
+ * The subcode parameter determines the type of the first parameter rx.
+ * Currently used are the following 3 subcommands:
+ * 0x0:   Read the Next Spool File Buffer (Data Record)
+ * 0x28:  Position a Spool File to the Designated Record
+ * 0xfff: Retrieve Next File Descriptor
+ *
+ * For subcommands 0x0 and 0xfff, the value of the first parameter is
+ * a virtual address of a memory buffer and needs virtual to physical
+ * address translation. For other subcommands the rx parameter is not
+ * a virtual address.
  */
 int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 {
 	diag_stat_inc(DIAG_STAT_X014);
+	switch (subcode) {
+	case 0x0:
+	case 0xfff:
+		rx = virt_to_phys((void *)rx);
+		break;
+	default:
+		/* Do nothing */
+		break;
+	}
 	return diag_amode31_ops.diag14(rx, ry1, subcode);
 }
 EXPORT_SYMBOL(diag14);
 
+#define DIAG204_BUSY_RC 8
+
 static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
 {
 	union register_pair rp = { .even = *subcode, .odd = size };
 
-	asm volatile(
+	asm_inline volatile(
 		"	diag	%[addr],%[rp],0x204\n"
 		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
@@ -162,12 +204,35 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
 	return rp.odd;
 }
 
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
 int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
+	if (addr) {
+		if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+			return -EINVAL;
+		if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+			return -EINVAL;
+	}
+	if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+		addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
 	diag_stat_inc(DIAG_STAT_X204);
 	size = __diag204(&subcode, size, addr);
-	if (subcode)
-		return -1;
+	if (subcode == DIAG204_BUSY_RC)
+		return -EBUSY;
+	else if (subcode)
+		return -EOPNOTSUPP;
 	return size;
 }
 EXPORT_SYMBOL(diag204);
@@ -194,17 +259,41 @@ int diag210(struct diag210 *addr)
 }
 EXPORT_SYMBOL(diag210);
 
+/*
+ * Diagnose 8C: Access 3270 Display Device Information
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+	static DEFINE_SPINLOCK(diag8c_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag8c_lock, flags);
+
+	diag_stat_inc(DIAG_STAT_X08C);
+	ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+	*addr = *__diag8c_tmp_amode31;
+	spin_unlock_irqrestore(&diag8c_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
 int diag224(void *ptr)
 {
+	unsigned long addr = __pa(ptr);
 	int rc = -EOPNOTSUPP;
 
 	diag_stat_inc(DIAG_STAT_X224);
-	asm volatile(
-		"	diag	%1,%2,0x224\n"
-		"0:	lhi	%0,0x0\n"
+	asm_inline volatile("\n"
+		"	diag	%[type],%[addr],0x224\n"
+		"0:	lhi	%[rc],0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
-		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+		: [rc] "+d" (rc)
+		, "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+		: [type] "d" (0), [addr] "d" (addr));
 	return rc;
 }
 EXPORT_SYMBOL(diag224);
@@ -215,6 +304,21 @@ EXPORT_SYMBOL(diag224);
 int diag26c(void *req, void *resp, enum diag26c_sc subcode)
 {
 	diag_stat_inc(DIAG_STAT_X26C);
-	return diag_amode31_ops.diag26c(req, resp, subcode);
+	return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
 }
 EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+	int cc;
+
+	diag_stat_inc(DIAG_STAT_X49C);
+	asm volatile(
+		"	diag	%[subcode],0,0x49c\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [subcode] "d" (subcode)
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..d6a34454aa5a
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+	DIAG310_SUBC_0 = 0,
+	DIAG310_SUBC_1 = 1,
+	DIAG310_SUBC_4 = 4,
+	DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+	DIAG310_RET_SUCCESS	= 0x0001,
+	DIAG310_RET_BUSY	= 0x0101,
+	DIAG310_RET_OPNOTSUPP	= 0x0102,
+	DIAG310_RET_SC4_INVAL	= 0x0401,
+	DIAG310_RET_SC4_NODATA	= 0x0402,
+	DIAG310_RET_SC5_INVAL	= 0x0501,
+	DIAG310_RET_SC5_NODATA	= 0x0502,
+	DIAG310_RET_SC5_ESIZE	= 0x0503
+};
+
+union diag310_response {
+	u64 response;
+	struct {
+		u64 result	: 32;
+		u64		: 16;
+		u64 rc		: 16;
+	};
+};
+
+union diag310_req_subcode {
+	u64 subcode;
+	struct {
+		u64		: 48;
+		u64 st		: 8;
+		u64 sc		: 8;
+	};
+};
+
+union diag310_req_size {
+	u64 size;
+	struct {
+		u64 page_count	: 32;
+		u64		: 32;
+	};
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+	diag_stat_inc(DIAG_STAT_X310);
+	asm volatile("diag	%[rp],%[subcode],0x310\n"
+		     : [rp] "+d" (rp.pair)
+		     : [subcode] "d" (subcode)
+		     : "memory");
+	return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+	switch (result) {
+	case DIAG310_RET_BUSY:
+		return -EBUSY;
+	case DIAG310_RET_OPNOTSUPP:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+	union diag310_response res;
+
+	res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+	if (res.rc != DIAG310_RET_SUCCESS)
+		return diag310_result_to_errno(res.rc);
+	*mask = res.response;
+	return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+	union diag310_response res;
+
+	res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+	if (res.rc != DIAG310_RET_SUCCESS)
+		return diag310_result_to_errno(res.rc);
+	*stride = res.result;
+	return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+	union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+	union diag310_response res;
+
+	res.response = diag310(req.subcode, 0, NULL);
+	switch (res.rc) {
+	case DIAG310_RET_SUCCESS:
+		*pages = res.result;
+		return 0;
+	case DIAG310_RET_SC4_NODATA:
+		return -ENODATA;
+	case DIAG310_RET_SC4_INVAL:
+		return -EINVAL;
+	default:
+		return diag310_result_to_errno(res.rc);
+	}
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+	union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+	union diag310_req_size req_size = { .page_count = pages };
+	union diag310_response res;
+
+	res.response = diag310(req_sc.subcode, req_size.size, buf);
+	switch (res.rc) {
+	case DIAG310_RET_SUCCESS:
+		return 0;
+	case DIAG310_RET_SC5_NODATA:
+		return -ENODATA;
+	case DIAG310_RET_SC5_ESIZE:
+		return -EOVERFLOW;
+	case DIAG310_RET_SC5_INVAL:
+		return -EINVAL;
+	default:
+		return diag310_result_to_errno(res.rc);
+	}
+}
+
+static int diag310_check_features(void)
+{
+	static int features_available;
+	unsigned long mask;
+	int rc;
+
+	if (READ_ONCE(features_available))
+		return 0;
+	if (!sclp.has_diag310)
+		return -EOPNOTSUPP;
+	rc = diag310_get_subcode_mask(&mask);
+	if (rc)
+		return rc;
+	if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+		return -EOPNOTSUPP;
+	WRITE_ONCE(features_available, 1);
+	return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+	static unsigned long memtop_stride;
+	unsigned long stride;
+	int rc;
+
+	stride = READ_ONCE(memtop_stride);
+	if (!stride) {
+		rc = diag310_get_memtop_stride(&stride);
+		if (rc)
+			return rc;
+		WRITE_ONCE(memtop_stride, stride);
+	}
+	*res = stride;
+	return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+	static unsigned long memtop_pages[DIAG310_LEVELMAX];
+	unsigned long pages;
+	int rc;
+
+	if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+		return -EINVAL;
+	pages = READ_ONCE(memtop_pages[level - 1]);
+	if (!pages) {
+		rc = diag310_get_memtop_size(&pages, level);
+		if (rc)
+			return rc;
+		WRITE_ONCE(memtop_pages[level - 1], pages);
+	}
+	*res = pages;
+	return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+	size_t __user *argp = (void __user *)arg;
+	unsigned long stride;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	rc = memtop_get_stride_len(&stride);
+	if (rc)
+		return rc;
+	if (put_user(stride, argp))
+		return -EFAULT;
+	return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+	size_t __user *argp = (void __user *)arg;
+	unsigned long pages, level;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	if (get_user(level, argp))
+		return -EFAULT;
+	rc = memtop_get_page_count(&pages, level);
+	if (rc)
+		return rc;
+	if (put_user(pages * PAGE_SIZE, argp))
+		return -EFAULT;
+	return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+	struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+	unsigned long level, pages, data_size;
+	u64 address;
+	void *buf;
+	int rc;
+
+	rc = diag310_check_features();
+	if (rc)
+		return rc;
+	if (get_user(level, &udata->nesting_lvl))
+		return -EFAULT;
+	if (get_user(address, &udata->address))
+		return -EFAULT;
+	rc = memtop_get_page_count(&pages, level);
+	if (rc)
+		return rc;
+	data_size = pages * PAGE_SIZE;
+	buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	if (!buf)
+		return -ENOMEM;
+	rc = diag310_store_topology_map(buf, pages, level);
+	if (rc)
+		goto out;
+	if (copy_to_user((void __user *)address, buf, data_size))
+		rc = -EFAULT;
+out:
+	vfree(buf);
+	return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..7fa4c0b7eb6c
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt)	"diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+	DIAG324_SUBC_0 = 0,
+	DIAG324_SUBC_1 = 1,
+	DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+	DIAG324_RET_SUCCESS		= 0x0001,
+	DIAG324_RET_SUBC_NOTAVAIL	= 0x0103,
+	DIAG324_RET_INSUFFICIENT_SIZE	= 0x0104,
+	DIAG324_RET_READING_UNAVAILABLE	= 0x0105,
+};
+
+union diag324_response {
+	u64 response;
+	struct {
+		u64 installed	: 32;
+		u64		: 16;
+		u64 rc		: 16;
+	} sc0;
+	struct {
+		u64 format	: 16;
+		u64		: 16;
+		u64 pib_len	: 16;
+		u64 rc		: 16;
+	} sc1;
+	struct {
+		u64		: 48;
+		u64 rc		: 16;
+	} sc2;
+};
+
+union diag324_request {
+	u64 request;
+	struct {
+		u64		: 32;
+		u64 allocated	: 16;
+		u64		: 12;
+		u64 sc		: 4;
+	} sc2;
+};
+
+struct pib {
+	u32		: 8;
+	u32 num		: 8;
+	u32 len		: 16;
+	u32		: 24;
+	u32 hlen	: 8;
+	u64		: 64;
+	u64 intv;
+	u8  r[];
+} __packed;
+
+struct pibdata {
+	struct pib *pib;
+	ktime_t expire;
+	u64 sequence;
+	size_t len;
+	int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr };
+
+	diag_stat_inc(DIAG_STAT_X324);
+	asm volatile("diag	%[rp],%[subcode],0x324\n"
+		     : [rp] "+d" (rp.pair)
+		     : [subcode] "d" (subcode)
+		     : "memory");
+	return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+	struct pibdata *data = &pibdata;
+	ktime_t timedout;
+
+	mutex_lock(&pibmutex);
+	timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+	if (ktime_before(ktime_get(), timedout)) {
+		mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+		goto out;
+	}
+	vfree(data->pib);
+	data->pib = NULL;
+out:
+	mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+	union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+	union diag324_response res;
+	int rc;
+
+	memset(data->pib, 0, data->len);
+	res.response = diag324(req.request, data->pib);
+	switch (res.sc2.rc) {
+	case DIAG324_RET_SUCCESS:
+		rc = 0;
+		break;
+	case DIAG324_RET_SUBC_NOTAVAIL:
+		rc = -ENOENT;
+		break;
+	case DIAG324_RET_INSUFFICIENT_SIZE:
+		rc = -EMSGSIZE;
+		break;
+	case DIAG324_RET_READING_UNAVAILABLE:
+		rc = -EBUSY;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+	struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+	struct pibdata *data = &pibdata;
+	static bool first = true;
+	u64 address;
+	int rc;
+
+	if (!data->len)
+		return -EOPNOTSUPP;
+	if (get_user(address, &udata->address))
+		return -EFAULT;
+	mutex_lock(&pibmutex);
+	rc = -ENOMEM;
+	if (!data->pib)
+		data->pib = vmalloc(data->len);
+	if (!data->pib)
+		goto out;
+	if (first || ktime_after(ktime_get(), data->expire)) {
+		pib_update(data);
+		data->sequence++;
+		data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+		mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+		first = false;
+	}
+	rc = data->rc;
+	if (rc != 0 && rc != -EBUSY)
+		goto out;
+	rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+	rc |= put_user(data->sequence, &udata->sequence);
+	if (rc)
+		rc = -EFAULT;
+out:
+	mutex_unlock(&pibmutex);
+	return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+	struct pibdata *data = &pibdata;
+
+	if (!data->len)
+		return -EOPNOTSUPP;
+	if (put_user(data->len, (size_t __user *)arg))
+		return -EFAULT;
+	return 0;
+}
+
+static int __init diag324_init(void)
+{
+	union diag324_response res;
+	unsigned long installed;
+
+	if (!sclp.has_diag324)
+		return -EOPNOTSUPP;
+	res.response = diag324(DIAG324_SUBC_0, NULL);
+	if (res.sc0.rc != DIAG324_RET_SUCCESS)
+		return -EOPNOTSUPP;
+	installed = res.response;
+	if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+		return -EOPNOTSUPP;
+	if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+		return -EOPNOTSUPP;
+	res.response = diag324(DIAG324_SUBC_1, NULL);
+	if (res.sc1.rc != DIAG324_RET_SUCCESS)
+		return -EOPNOTSUPP;
+	pibdata.len = res.sc1.pib_len;
+	return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	long rc;
+
+	switch (cmd) {
+	case DIAG324_GET_PIBLEN:
+		rc = diag324_piblen(arg);
+		break;
+	case DIAG324_GET_PIBBUF:
+		rc = diag324_pibbuf(arg);
+		break;
+	case DIAG310_GET_STRIDE:
+		rc = diag310_memtop_stride(arg);
+		break;
+	case DIAG310_GET_MEMTOPLEN:
+		rc = diag310_memtop_len(arg);
+		break;
+	case DIAG310_GET_MEMTOPBUF:
+		rc = diag310_memtop_buf(arg);
+		break;
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
+	}
+	return rc;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+	.name	= "diag",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &fops,
+	.mode	= 0444,
+};
+
+static int diag_init(void)
+{
+	return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 90bbb4ea1d08..94eb8168ea44 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -24,8 +24,8 @@
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
+#include <linux/io.h>
 #include <asm/dis.h>
-#include <asm/io.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
@@ -122,6 +122,7 @@ enum {
 	U8_32,	/* 8 bit unsigned value starting at 32 */
 	U12_16, /* 12 bit unsigned value starting at 16 */
 	U16_16, /* 16 bit unsigned value starting at 16 */
+	U16_20, /* 16 bit unsigned value starting at 20 */
 	U16_32, /* 16 bit unsigned value starting at 32 */
 	U32_16, /* 32 bit unsigned value starting at 16 */
 	VX_12,	/* Vector index register starting at position 12 */
@@ -184,6 +185,7 @@ static const struct s390_operand operands[] = {
 	[U8_32]	 = {  8, 32, 0 },
 	[U12_16] = { 12, 16, 0 },
 	[U16_16] = { 16, 16, 0 },
+	[U16_20] = { 16, 20, 0 },
 	[U16_32] = { 16, 32, 0 },
 	[U32_16] = { 32, 16, 0 },
 	[VX_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +259,6 @@ static const unsigned char formats[][6] = {
 	[INSTR_RSL_R0RD]     = { D_20, L4_8, B_16, 0, 0, 0 },
 	[INSTR_RSY_AARD]     = { A_8, A_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_CCRD]     = { C_8, C_12, D20_20, B_16, 0, 0 },
-	[INSTR_RSY_RDRU]     = { R_8, D20_20, B_16, U4_12, 0, 0 },
 	[INSTR_RSY_RRRD]     = { R_8, R_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_RURD]     = { R_8, U4_12, D20_20, B_16, 0, 0 },
 	[INSTR_RSY_RURD2]    = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +301,17 @@ static const unsigned char formats[][6] = {
 	[INSTR_VRI_V0UU2]    = { V_8, U16_16, U4_32, 0, 0, 0 },
 	[INSTR_VRI_V0UUU]    = { V_8, U8_16, U8_24, U4_32, 0, 0 },
 	[INSTR_VRI_VR0UU]    = { V_8, R_12, U8_28, U4_24, 0, 0 },
+	[INSTR_VRI_VV0UU]    = { V_8, V_12, U8_28, U4_24, 0, 0 },
 	[INSTR_VRI_VVUU]     = { V_8, V_12, U16_16, U4_32, 0, 0 },
 	[INSTR_VRI_VVUUU]    = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
 	[INSTR_VRI_VVUUU2]   = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
 	[INSTR_VRI_VVV0U]    = { V_8, V_12, V_16, U8_24, 0, 0 },
 	[INSTR_VRI_VVV0UU]   = { V_8, V_12, V_16, U8_24, U4_32, 0 },
 	[INSTR_VRI_VVV0UU2]  = { V_8, V_12, V_16, U8_28, U4_24, 0 },
-	[INSTR_VRR_0V]	     = { V_12, 0, 0, 0, 0, 0 },
+	[INSTR_VRI_VVV0UV]   = { V_8, V_12, V_16, V_32, U8_24, 0 },
+	[INSTR_VRR_0V0U]     = { V_12, U16_20, 0, 0, 0, 0 },
 	[INSTR_VRR_0VV0U]    = { V_12, V_16, U4_24, 0, 0, 0 },
+	[INSTR_VRR_0VVU]     = { V_12, V_16, U16_20, 0, 0, 0 },
 	[INSTR_VRR_RV0UU]    = { R_8, V_12, U4_24, U4_28, 0, 0 },
 	[INSTR_VRR_VRR]	     = { V_8, R_12, R_16, 0, 0, 0 },
 	[INSTR_VRR_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +459,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
 			if (separator)
 				ptr += sprintf(ptr, "%c", separator);
 			if (operand->flags & OPERAND_GPR)
-				ptr += sprintf(ptr, "%%r%i", value);
+				ptr += sprintf(ptr, "%%r%u", value);
 			else if (operand->flags & OPERAND_FPR)
-				ptr += sprintf(ptr, "%%f%i", value);
+				ptr += sprintf(ptr, "%%f%u", value);
 			else if (operand->flags & OPERAND_AR)
-				ptr += sprintf(ptr, "%%a%i", value);
+				ptr += sprintf(ptr, "%%a%u", value);
 			else if (operand->flags & OPERAND_CR)
-				ptr += sprintf(ptr, "%%c%i", value);
+				ptr += sprintf(ptr, "%%c%u", value);
 			else if (operand->flags & OPERAND_VR)
-				ptr += sprintf(ptr, "%%v%i", value);
+				ptr += sprintf(ptr, "%%v%u", value);
 			else if (operand->flags & OPERAND_PCREL) {
 				void *pcrel = (void *)((int)value + addr);
 
 				ptr += sprintf(ptr, "%px", pcrel);
 			} else if (operand->flags & OPERAND_SIGNED)
-				ptr += sprintf(ptr, "%i", value);
+				ptr += sprintf(ptr, "%i", (int)value);
 			else
 				ptr += sprintf(ptr, "%u", value);
 			if (operand->flags & OPERAND_DISP)
@@ -516,7 +520,7 @@ void show_code(struct pt_regs *regs)
 		if (copy_from_regs(regs, code + end, (void *)addr, 2))
 			break;
 	}
-	/* Code snapshot useable ? */
+	/* Code snapshot usable ? */
 	if ((regs->psw.addr & 1) || start >= end) {
 		printk("%s Code: Bad PSW.\n", mode);
 		return;
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 1e3233eb510a..dd410962ecbe 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <asm/asm-offsets.h>
 #include <asm/processor.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
@@ -41,60 +42,50 @@ const char *stack_type_name(enum stack_type type)
 EXPORT_SYMBOL_GPL(stack_type_name);
 
 static inline bool in_stack(unsigned long sp, struct stack_info *info,
-			    enum stack_type type, unsigned long low,
-			    unsigned long high)
+			    enum stack_type type, unsigned long stack)
 {
-	if (sp < low || sp >= high)
+	if (sp < stack || sp >= stack + THREAD_SIZE)
 		return false;
 	info->type = type;
-	info->begin = low;
-	info->end = high;
+	info->begin = stack;
+	info->end = stack + THREAD_SIZE;
 	return true;
 }
 
 static bool in_task_stack(unsigned long sp, struct task_struct *task,
 			  struct stack_info *info)
 {
-	unsigned long stack;
+	unsigned long stack = (unsigned long)task_stack_page(task);
 
-	stack = (unsigned long) task_stack_page(task);
-	return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE);
+	return in_stack(sp, info, STACK_TYPE_TASK, stack);
 }
 
 static bool in_irq_stack(unsigned long sp, struct stack_info *info)
 {
-	unsigned long frame_size, top;
+	unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
 
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	top = S390_lowcore.async_stack + frame_size;
-	return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top);
+	return in_stack(sp, info, STACK_TYPE_IRQ, stack);
 }
 
 static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
 {
-	unsigned long frame_size, top;
+	unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
 
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	top = S390_lowcore.nodat_stack + frame_size;
-	return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
+	return in_stack(sp, info, STACK_TYPE_NODAT, stack);
 }
 
 static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
 {
-	unsigned long frame_size, top;
+	unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
 
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	top = S390_lowcore.mcck_stack + frame_size;
-	return in_stack(sp, info, STACK_TYPE_MCCK, top - THREAD_SIZE, top);
+	return in_stack(sp, info, STACK_TYPE_MCCK, stack);
 }
 
 static bool in_restart_stack(unsigned long sp, struct stack_info *info)
 {
-	unsigned long frame_size, top;
+	unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
 
-	frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-	top = S390_lowcore.restart_stack + frame_size;
-	return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top);
+	return in_stack(sp, info, STACK_TYPE_RESTART, stack);
 }
 
 int get_stack_info(unsigned long sp, struct task_struct *task,
@@ -152,7 +143,13 @@ void show_stack(struct task_struct *task, unsigned long *stack,
 static void show_last_breaking_event(struct pt_regs *regs)
 {
 	printk("Last Breaking-Event-Address:\n");
-	printk(" [<%016lx>] %pSR\n", regs->last_break, (void *)regs->last_break);
+	printk(" [<%016lx>] ", regs->last_break);
+	if (user_mode(regs)) {
+		print_vma_addr(KERN_CONT, regs->last_break);
+		pr_cont("\n");
+	} else {
+		pr_cont("%pSR\n", (void *)regs->last_break);
+	}
 }
 
 void show_registers(struct pt_regs *regs)
@@ -202,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str)
 	console_verbose();
 	spin_lock_irq(&die_lock);
 	bust_spinlocks(1);
-	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+	printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff,
 	       regs->int_code >> 17, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	pr_cont("PREEMPT ");
-#elif defined(CONFIG_PREEMPT_RT)
-	pr_cont("PREEMPT_RT ");
-#endif
 	pr_cont("SMP ");
 	if (debug_pagealloc_enabled())
 		pr_cont("DEBUG_PAGEALLOC");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 432c8c987256..54cf0923050f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -7,6 +7,8 @@
 #define KMSG_COMPONENT "setup"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/sched/debug.h>
+#include <linux/cpufeature.h>
 #include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -18,8 +20,13 @@
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <asm/asm-extable.h>
+#include <linux/memblock.h>
+#include <asm/access-regs.h>
+#include <asm/asm-offsets.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
+#include <asm/fpu.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
@@ -30,24 +37,36 @@
 #include <asm/sclp.h>
 #include <asm/facility.h>
 #include <asm/boot_data.h>
-#include <asm/switch_to.h>
 #include "entry.h"
 
-int __bootdata(is_full_image);
+#define __decompressor_handled_param(func, param)		\
+static int __init ignore_decompressor_param_##func(char *s)	\
+{								\
+	return 0;						\
+}								\
+early_param(#param, ignore_decompressor_param_##func)
+
+#define decompressor_handled_param(param) __decompressor_handled_param(param, param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
+__decompressor_handled_param(debug_alternative, debug-alternative);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
+#endif
 
-static void __init reset_tod_clock(void)
+static void __init kasan_early_init(void)
 {
-	union tod_clock clk;
-
-	if (store_tod_clock_ext_cc(&clk) == 0)
-		return;
-	/* TOD clock not running. Set the clock to Unix Epoch. */
-	if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
-		disabled_wait();
-
-	memset(&tod_clock_base, 0, sizeof(tod_clock_base));
-	tod_clock_base.tod = TOD_UNIX_EPOCH;
-	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+#ifdef CONFIG_KASAN
+	init_task.kasan_depth = 0;
+	pr_info("KernelAddressSanitizer initialized\n");
+#endif
 }
 
 /*
@@ -68,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void)
 
 static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
 
-static noinline __init void detect_machine_type(void)
-{
-	struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
-
-	/* Check current-configuration-level */
-	if (stsi(NULL, 0, 0, 0) <= 2) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
-		return;
-	}
-	/* Get virtual-machine cpu information. */
-	if (stsi(vmms, 3, 2, 2) || !vmms->count)
-		return;
-
-	/* Detect known hypervisors */
-	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
-	else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
-}
-
 /* Remove leading, trailing and double whitespace. */
 static inline void strim_all(char *str)
 {
@@ -128,9 +127,9 @@ static noinline __init void setup_arch_string(void)
 		strim_all(hvstr);
 	} else {
 		sprintf(hvstr, "%s",
-			MACHINE_IS_LPAR ? "LPAR" :
-			MACHINE_IS_VM ? "z/VM" :
-			MACHINE_IS_KVM ? "KVM" : "unknown");
+			machine_is_lpar() ? "LPAR" :
+			machine_is_vm() ? "z/VM" :
+			machine_is_kvm() ? "KVM" : "unknown");
 	}
 	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
 }
@@ -139,9 +138,8 @@ static __init void setup_topology(void)
 {
 	int max_mnest;
 
-	if (!test_facility(11))
+	if (!cpu_has_topology())
 		return;
-	S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
 	for (max_mnest = 6; max_mnest > 1; max_mnest--) {
 		if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
 			break;
@@ -149,103 +147,58 @@ static __init void setup_topology(void)
 	topology_max_mnest = max_mnest;
 }
 
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
 {
-	if (!fixup_exception(regs))
-		disabled_wait();
+	struct lowcore *lc = get_lowcore();
+	unsigned long ip;
+
+	regs->int_code = lc->pgm_int_code;
+	regs->int_parm_long = lc->trans_exc_code;
+	ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+	/* Monitor Event? Might be a warning */
+	if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+		if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+			return;
+	}
+	if (fixup_exception(regs))
+		return;
+	/*
+	 * Unhandled exception - system cannot continue but try to get some
+	 * helpful messages to the console. Use early_printk() to print
+	 * some basic information in case it is too early for printk().
+	 */
+	register_early_console();
+	early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+		     regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+	show_regs(regs);
+	disabled_wait();
 }
 
 static noinline __init void setup_lowcore_early(void)
 {
+	struct lowcore *lc = get_lowcore();
 	psw_t psw;
 
 	psw.addr = (unsigned long)early_pgm_check_handler;
-	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
-	if (IS_ENABLED(CONFIG_KASAN))
-		psw.mask |= PSW_MASK_DAT;
-	S390_lowcore.program_new_psw = psw;
-	S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
-}
-
-static noinline __init void setup_facility_list(void)
-{
-	memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
-	if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
-		__clear_facility(82, alt_stfle_fac_list);
-}
-
-static __init void detect_diag9c(void)
-{
-	unsigned int cpu_address;
-	int rc;
-
-	cpu_address = stap();
-	diag_stat_inc(DIAG_STAT_X09C);
-	asm volatile(
-		"	diag	%2,0,0x9c\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
-}
-
-static __init void detect_machine_facilities(void)
-{
-	if (test_facility(8)) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
-		__ctl_set_bit(0, 23);
-	}
-	if (test_facility(78))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
-	if (test_facility(3))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
-	if (test_facility(50) && test_facility(73)) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
-		__ctl_set_bit(0, 55);
-	}
-	if (test_facility(51))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
-	if (test_facility(129)) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
-		__ctl_set_bit(0, 17);
-	}
-	if (test_facility(130) && !noexec_disabled) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
-		__ctl_set_bit(0, 20);
-	}
-	if (test_facility(133))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
-	if (test_facility(139) && (tod_clock_base.tod >> 63)) {
-		/* Enabled signed clock comparator comparisons */
-		S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
-		clock_comparator_max = -1ULL >> 1;
-		__ctl_set_bit(0, 53);
-	}
-	if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
-		S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
-		/* the control bit is set during PCI initialization */
-	}
+	psw.mask = PSW_KERNEL_BITS;
+	lc->program_new_psw = psw;
+	lc->preempt_count = INIT_PREEMPT_COUNT;
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 }
 
 static inline void save_vector_registers(void)
 {
 #ifdef CONFIG_CRASH_DUMP
-	if (test_facility(129))
+	if (cpu_has_vx())
 		save_vx_regs(boot_cpu_vector_save_area);
 #endif
 }
 
-static inline void setup_control_registers(void)
+static inline void setup_low_address_protection(void)
 {
-	unsigned long reg;
-
-	__ctl_store(reg, 0, 0);
-	reg |= CR0_LOW_ADDRESS_PROTECTION;
-	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
-	reg |= CR0_EXTERNAL_CALL_SUBMASK;
-	__ctl_load(reg, 0, 0);
+	system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
 }
 
 static inline void setup_access_registers(void)
@@ -255,30 +208,11 @@ static inline void setup_access_registers(void)
 	restore_access_regs(acrs);
 }
 
-static int __init disable_vector_extension(char *str)
-{
-	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
-	__ctl_clear_bit(0, 17);
-	return 0;
-}
-early_param("novx", disable_vector_extension);
-
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
 static void __init setup_boot_command_line(void)
 {
 	/* copy arch command line */
-	strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
-}
-
-static void __init check_image_bootable(void)
-{
-	if (is_full_image)
-		return;
-
-	sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
-	sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
-	sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
-	disabled_wait();
+	strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
 }
 
 static void __init sort_amode31_extable(void)
@@ -288,24 +222,18 @@ static void __init sort_amode31_extable(void)
 
 void __init startup_init(void)
 {
-	sclp_early_adjust_va();
-	reset_tod_clock();
-	check_image_bootable();
+	kasan_early_init();
 	time_early_init();
 	init_kernel_storage_key();
 	lockdep_off();
 	sort_amode31_extable();
 	setup_lowcore_early();
-	setup_facility_list();
-	detect_machine_type();
 	setup_arch_string();
 	setup_boot_command_line();
-	detect_diag9c();
-	detect_machine_facilities();
 	save_vector_registers();
 	setup_topology();
 	sclp_early_detect();
-	setup_control_registers();
+	setup_low_address_protection();
 	setup_access_registers();
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <asm/setup.h>
 #include <asm/sclp.h>
 
 static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
 	.index = -1,
 };
 
+void __init register_early_console(void)
+{
+	if (early_console)
+		return;
+	if (!sclp.has_linemode && !sclp.has_vt220)
+		return;
+	early_console = &sclp_early_console;
+	register_console(early_console);
+}
+
 static int __init setup_early_printk(char *buf)
 {
 	if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
 	/* Accept only "earlyprintk" and "earlyprintk=sclp" */
 	if (buf && !str_has_prefix(buf, "sclp"))
 		return 0;
-	if (!sclp.has_linemode && !sclp.has_vt220)
-		return 0;
-	early_console = &sclp_early_console;
-	register_console(early_console);
+	register_early_console();
 	return 0;
 }
 early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index f521c6da37b8..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    Copyright IBM Corp. 2006, 2007
- *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-ENTRY(early_pgm_check_handler)
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	lgr	%r2,%r11
-	brasl	%r14,__do_early_pgm_check
-	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
-	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	lpswe	__LC_RETURN_PSW
-ENDPROC(early_pgm_check_handler)
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index 7f8246c9be08..0e51fa537262 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    ECBDIC -> ASCII, ASCII -> ECBDIC,
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC,
  *    upper to lower case (EBCDIC) conversion tables.
  *
  *  S390 version
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d2a1f2f4f5b8..0f00f4b06d51 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -8,10 +8,11 @@
  *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  */
 
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/dwarf.h>
@@ -23,62 +24,51 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu-insn.h>
 #include <asm/setup.h>
 #include <asm/nmi.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
-STACK_SIZE  = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+#include <asm/lowcore.h>
+#include <asm/machine.h>
 
 _LPP_OFFSET	= __LC_LPP
 
 	.macro STBEAR address
-	ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+	ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
 	.endm
 
 	.macro LBEAR address
-	ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
-	.endm
-
-	.macro LPSWEY address,lpswe
-	ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+	ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
 	.endm
 
-	.macro MBEAR reg
-	ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+	.macro LPSWEY address, lpswe
+	ALTERNATIVE_2 "b \lpswe;nopr", \
+		".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193),		\
+		__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0),	\
+		ALT_FEATURE(MFEATURE_LOWCORE)
 	.endm
 
-	.macro	CHECK_STACK savearea
-#ifdef CONFIG_CHECK_STACK
-	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	lghi	%r14,\savearea
-	jz	stack_overflow
-#endif
+	.macro MBEAR reg, lowcore
+	ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+		ALT_FACILITY(193)
 	.endm
 
-	.macro	CHECK_VMAP_STACK savearea,oklabel
-#ifdef CONFIG_VMAP_STACK
+	.macro	CHECK_VMAP_STACK savearea, lowcore, oklabel
 	lgr	%r14,%r15
-	nill	%r14,0x10000 - STACK_SIZE
-	oill	%r14,STACK_INIT
-	clg	%r14,__LC_KERNEL_STACK
+	nill	%r14,0x10000 - THREAD_SIZE
+	oill	%r14,STACK_INIT_OFFSET
+	clg	%r14,__LC_KERNEL_STACK(\lowcore)
 	je	\oklabel
-	clg	%r14,__LC_ASYNC_STACK
+	clg	%r14,__LC_ASYNC_STACK(\lowcore)
 	je	\oklabel
-	clg	%r14,__LC_MCCK_STACK
+	clg	%r14,__LC_MCCK_STACK(\lowcore)
 	je	\oklabel
-	clg	%r14,__LC_NODAT_STACK
+	clg	%r14,__LC_NODAT_STACK(\lowcore)
 	je	\oklabel
-	clg	%r14,__LC_RESTART_STACK
+	clg	%r14,__LC_RESTART_STACK(\lowcore)
 	je	\oklabel
-	lghi	%r14,\savearea
-	j	stack_overflow
-#else
-	j	\oklabel
-#endif
+	la	%r14,\savearea(\lowcore)
+	j	stack_invalid
 	.endm
 
 	/*
@@ -104,151 +94,108 @@ _LPP_OFFSET	= __LC_LPP
 	.endm
 
 	.macro BPOFF
-	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
 	.endm
 
 	.macro BPON
-	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
 	.endm
 
 	.macro BPENTER tif_ptr,tif_mask
 	ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
-		    "j .+12; nop; nop", 82
+		    "j .+12; nop; nop", ALT_SPEC(82)
 	.endm
 
 	.macro BPEXIT tif_ptr,tif_mask
 	TSTMSK	\tif_ptr,\tif_mask
 	ALTERNATIVE "jz .+8;  .insn rrf,0xb2e80000,0,0,12,0", \
-		    "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
-	.endm
-
-	/*
-	 * The CHKSTG macro jumps to the provided label in case the
-	 * machine check interruption code reports one of unrecoverable
-	 * storage errors:
-	 * - Storage error uncorrected
-	 * - Storage key error uncorrected
-	 * - Storage degradation with Failing-storage-address validity
-	 */
-	.macro CHKSTG errlabel
-	TSTMSK	__LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
-	jnz	\errlabel
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
-	jz	.Loklabel\@
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
-	jnz	\errlabel
-.Loklabel\@:
+		    "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
 	.endm
 
 #if IS_ENABLED(CONFIG_KVM)
-	/*
-	 * The OUTSIDE macro jumps to the provided label in case the value
-	 * in the provided register is outside of the provided range. The
-	 * macro is useful for checking whether a PSW stored in a register
-	 * pair points inside or outside of a block of instructions.
-	 * @reg: register to check
-	 * @start: start of the range
-	 * @end: end of the range
-	 * @outside_label: jump here if @reg is outside of [@start..@end)
-	 */
-	.macro OUTSIDE reg,start,end,outside_label
-	lgr	%r14,\reg
-	larl	%r13,\start
-	slgr	%r14,%r13
-#ifdef CONFIG_AS_IS_LLVM
-	clgfrl	%r14,.Lrange_size\@
-#else
-	clgfi	%r14,\end - \start
-#endif
-	jhe	\outside_label
-#ifdef CONFIG_AS_IS_LLVM
-	.section .rodata, "a"
-	.align 4
-.Lrange_size\@:
-	.long	\end - \start
-	.previous
-#endif
-	.endm
-
-	.macro SIEEXIT
-	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
-	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+	.macro SIEEXIT sie_control,lowcore
+	lg	%r9,\sie_control			# get control block pointer
+	ni	__SIE_PROG0C+3(%r9),0xfe		# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE(\lowcore)	# load primary asce
+	lg	%r9,__LC_CURRENT(\lowcore)
+	mvi	__TI_sie(%r9),0
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	.endm
 #endif
 
+	.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	brasl	%r14,stackleak_erase_on_task_stack
+#endif
+	.endm
+
 	GEN_BR_THUNK %r14
 
 	.section .kprobes.text, "ax"
 .Ldummy:
 	/*
-	 * This nop exists only in order to avoid that __bpon starts at
-	 * the beginning of the kprobes text section. In that case we would
-	 * have several symbols at the same address. E.g. objdump would take
-	 * an arbitrary symbol name when disassembling this code.
-	 * With the added nop in between the __bpon symbol is unique
-	 * again.
+	 * The following nop exists only in order to avoid that the next
+	 * symbol starts at the beginning of the kprobes text section.
+	 * In that case there would be several symbols at the same address.
+	 * E.g. objdump would take an arbitrary symbol when disassembling
+	 * the code.
+	 * With the added nop in between this cannot happen.
 	 */
 	nop	0
 
-ENTRY(__bpon)
-	.globl __bpon
-	BPON
-	BR_EX	%r14
-ENDPROC(__bpon)
-
 /*
- * Scheduler resume function, called by switch_to
- *  gpr2 = (task_struct *) prev
- *  gpr3 = (task_struct *) next
+ * Scheduler resume function, called by __switch_to
+ *  gpr2 = (task_struct *)prev
+ *  gpr3 = (task_struct *)next
  * Returns:
  *  gpr2 = prev
  */
-ENTRY(__switch_to)
+SYM_FUNC_START(__switch_to_asm)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lghi	%r4,__TASK_stack
 	lghi	%r1,__TASK_thread
-	llill	%r5,STACK_INIT
+	llill	%r5,STACK_INIT_OFFSET
 	stg	%r15,__THREAD_ksp(%r1,%r2)	# store kernel stack of prev
 	lg	%r15,0(%r4,%r3)			# start of kernel stack of next
 	agr	%r15,%r5			# end of kernel stack of next
-	stg	%r3,__LC_CURRENT		# store task struct of next
-	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	GET_LC	%r13
+	stg	%r3,__LC_CURRENT(%r13)		# store task struct of next
+	stg	%r15,__LC_KERNEL_STACK(%r13)	# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1,%r3)	# load kernel stack of next
 	aghi	%r3,__TASK_pid
-	mvc	__LC_CURRENT_PID(4,%r0),0(%r3)	# store pid of next
+	mvc	__LC_CURRENT_PID(4,%r13),0(%r3)	# store pid of next
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
-	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
 	BR_EX	%r14
-ENDPROC(__switch_to)
+SYM_FUNC_END(__switch_to_asm)
 
 #if IS_ENABLED(CONFIG_KVM)
 /*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
+ * %r5 guest asce
  */
-ENTRY(sie64a)
+SYM_FUNC_START(__sie64a)
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	lg	%r12,__LC_CURRENT
-	stg	%r2,__SF_SIE_CONTROL(%r15)	# save control block pointer
-	stg	%r3,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	GET_LC	%r13
+	lg	%r14,__LC_CURRENT(%r13)
+	stg	%r2,__SF_SIE_CONTROL_PHYS(%r15)	# save sie block physical..
+	stg	%r3,__SF_SIE_CONTROL(%r15)	# ...and virtual addresses
+	stg	%r4,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	stg	%r5,__SF_SIE_GUEST_ASCE(%r15)	# save guest asce
 	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
-	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
+	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
+	lmg	%r0,%r13,0(%r4)			# load guest gprs 0-13
+	mvi	__TI_sie(%r14),1
+	lctlg	%c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
 	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
 	jnz	.Lsie_skip
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lsie_skip			# exit if fp/vx regs changed
-	BPEXIT	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	lg	%r14,__SF_SIE_CONTROL_PHYS(%r15)	# get sie block phys addr
+	BPEXIT	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
 .Lsie_entry:
 	sie	0(%r14)
 # Let the next instruction be NOP to avoid triggering a machine check
@@ -256,24 +203,15 @@ ENTRY(sie64a)
 	nopr	7
 .Lsie_leave:
 	BPOFF
-	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
 .Lsie_skip:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
-.Lsie_done:
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
-# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-.Lrewind_pad6:
-	nopr	7
-.Lrewind_pad4:
-	nopr	7
-.Lrewind_pad2:
-	nopr	7
-	.globl sie_exit
-sie_exit:
+	GET_LC	%r14
+	lctlg	%c1,%c1,__LC_USER_ASCE(%r14)	# load primary asce
+	lg	%r14,__LC_CURRENT(%r14)
+	mvi	__TI_sie(%r14),0
+SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
 	lg	%r14,__SF_SIE_SAVEAREA(%r15)	# load guest register save area
 	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	xgr	%r0,%r0				# clear guest registers to
@@ -284,17 +222,8 @@ sie_exit:
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
 	lg	%r2,__SF_SIE_REASON(%r15)	# return exit reason code
 	BR_EX	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_SIE_REASON(%r15)	# set exit reason code
-	j	sie_exit
-
-	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
-	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
-	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
-ENDPROC(sie64a)
-EXPORT_SYMBOL(sie64a)
+SYM_FUNC_END(__sie64a)
+EXPORT_SYMBOL(__sie64a)
 EXPORT_SYMBOL(sie_exit)
 #endif
 
@@ -303,19 +232,17 @@ EXPORT_SYMBOL(sie_exit)
  * are entered with interrupts disabled.
  */
 
-ENTRY(system_call)
-	stpt	__LC_SYS_ENTER_TIMER
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+SYM_CODE_START(system_call)
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
+	GET_LC	%r13
+	stpt	__LC_SYS_ENTER_TIMER(%r13)
 	BPOFF
 	lghi	%r14,0
 .Lsysc_per:
-	STBEAR	__LC_LAST_BREAK
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE
-	lg	%r12,__LC_CURRENT
-	lg	%r15,__LC_KERNEL_STACK
+	STBEAR	__LC_LAST_BREAK(%r13)
+	lg	%r15,__LC_KERNEL_STACK(%r13)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stmg	%r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
 	# clear user controlled register to prevent speculative use
 	xgr	%r0,%r0
 	xgr	%r1,%r1
@@ -328,74 +255,71 @@ ENTRY(system_call)
 	xgr	%r10,%r10
 	xgr	%r11,%r11
 	la	%r2,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
-	MBEAR	%r2
+	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
+	MBEAR	%r2,%r13
 	lgr	%r3,%r14
 	brasl	%r14,__do_syscall
-	lctlg	%c1,%c1,__LC_USER_ASCE
-	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
-	BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+	STACKLEAK_ERASE
+	mvc	__LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
 	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	stpt	__LC_EXIT_TIMER(%r13)
 	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	stpt	__LC_EXIT_TIMER
 	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(system_call)
+SYM_CODE_END(system_call)
 
 #
 # a new process exits the kernel with ret_from_fork
 #
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
 	lgr	%r3,%r11
 	brasl	%r14,__ret_from_fork
-	lctlg	%c1,%c1,__LC_USER_ASCE
-	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
-	BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+	STACKLEAK_ERASE
+	GET_LC	%r13
+	mvc	__LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
 	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	stpt	__LC_EXIT_TIMER(%r13)
 	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	stpt	__LC_EXIT_TIMER
 	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
 
 /*
  * Program check handler routine
  */
 
-ENTRY(pgm_check_handler)
-	stpt	__LC_SYS_ENTER_TIMER
+SYM_CODE_START(pgm_check_handler)
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
+	GET_LC	%r13
+	stpt	__LC_SYS_ENTER_TIMER(%r13)
 	BPOFF
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	lg	%r12,__LC_CURRENT
-	lghi	%r10,0
-	lmg	%r8,%r9,__LC_PGM_OLD_PSW
+	lmg	%r8,%r9,__LC_PGM_OLD_PSW(%r13)
+	xgr	%r10,%r10
 	tmhh	%r8,0x0001		# coming from user space?
-	jno	.Lpgm_skip_asce
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE
-	j	3f			# -> fault in user space
-.Lpgm_skip_asce:
+	jo	3f			# -> fault in user space
 #if IS_ENABLED(CONFIG_KVM)
-	# cleanup critical section for program checks in sie64a
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,1f
-	SIEEXIT
+	lg	%r11,__LC_CURRENT(%r13)
+	tm	__TI_sie(%r11),0xff
+	jz	1f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT __SF_SIE_CONTROL(%r15),%r13
 	lghi	%r10,_PIF_GUEST_FAULT
 #endif
 1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
 	jnz	2f			# -> enabled, can't be a double fault
-	tm	__LC_PGM_ILC+3,0x80	# check for per exception
+	tm	__LC_PGM_ILC+3(%r13),0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-2:	CHECK_STACK __LC_SAVE_AREA_SYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	# CHECK_VMAP_STACK branches to stack_overflow or 4f
-	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3:	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	lg	%r15,__LC_KERNEL_STACK
+2:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	# CHECK_VMAP_STACK branches to stack_invalid or 4f
+	CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
+3:	lg	%r15,__LC_KERNEL_STACK(%r13)
 4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stg	%r10,__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
 	stmg	%r8,%r9,__PT_PSW(%r11)
-
 	# clear user controlled registers to prevent speculative use
 	xgr	%r0,%r0
 	xgr	%r1,%r1
@@ -404,15 +328,16 @@ ENTRY(pgm_check_handler)
 	xgr	%r5,%r5
 	xgr	%r6,%r6
 	xgr	%r7,%r7
+	xgr	%r12,%r12
 	lgr	%r2,%r11
 	brasl	%r14,__do_pgm_check
 	tmhh	%r8,0x0001		# returning to user space?
 	jno	.Lpgm_exit_kernel
-	lctlg	%c1,%c1,__LC_USER_ASCE
-	BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
-	stpt	__LC_EXIT_TIMER
+	STACKLEAK_ERASE
+	BPON
+	stpt	__LC_EXIT_TIMER(%r13)
 .Lpgm_exit_kernel:
-	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	mvc	__LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
 	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
 	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
 	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -421,39 +346,38 @@ ENTRY(pgm_check_handler)
 # single stepped system call
 #
 .Lpgm_svcper:
-	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+	mvc	__LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
 	larl	%r14,.Lsysc_per
-	stg	%r14,__LC_RETURN_PSW+8
+	stg	%r14,__LC_RETURN_PSW+8(%r13)
 	lghi	%r14,1
-	LBEAR	__LC_PGM_LAST_BREAK
+	LBEAR	__LC_PGM_LAST_BREAK(%r13)
 	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
-ENDPROC(pgm_check_handler)
+SYM_CODE_END(pgm_check_handler)
 
 /*
  * Interrupt handler macro used for external and IO interrupts.
  */
 .macro INT_HANDLER name,lc_old_psw,handler
-ENTRY(\name)
-	stckf	__LC_INT_CLOCK
-	stpt	__LC_SYS_ENTER_TIMER
-	STBEAR	__LC_LAST_BREAK
+SYM_CODE_START(\name)
+	STMG_LC	%r8,%r15,__LC_SAVE_AREA
+	GET_LC	%r13
+	stckf	__LC_INT_CLOCK(%r13)
+	stpt	__LC_SYS_ENTER_TIMER(%r13)
+	STBEAR	__LC_LAST_BREAK(%r13)
 	BPOFF
-	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	lg	%r12,__LC_CURRENT
-	lmg	%r8,%r9,\lc_old_psw
+	lmg	%r8,%r9,\lc_old_psw(%r13)
 	tmhh	%r8,0x0001			# interrupting from user ?
 	jnz	1f
 #if IS_ENABLED(CONFIG_KVM)
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,0f
-	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-	SIEEXIT
+	lg	%r10,__LC_CURRENT(%r13)
+	tm	__TI_sie(%r10),0xff
+	jz	0f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT __SF_SIE_CONTROL(%r15),%r13
 #endif
-0:	CHECK_STACK __LC_SAVE_AREA_ASYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	2f
-1:	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE
-	lg	%r15,__LC_KERNEL_STACK
+1:	lg	%r15,__LC_KERNEL_STACK(%r13)
 2:	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -467,114 +391,85 @@ ENTRY(\name)
 	xgr	%r7,%r7
 	xgr	%r10,%r10
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-	MBEAR	%r11
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+	MBEAR	%r11,%r13
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,\handler
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+	mvc	__LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
 	tmhh	%r8,0x0001		# returning to user ?
 	jno	2f
-	lctlg	%c1,%c1,__LC_USER_ASCE
-	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-	stpt	__LC_EXIT_TIMER
+	STACKLEAK_ERASE
+	BPON
+	stpt	__LC_EXIT_TIMER(%r13)
 2:	LBEAR	__PT_LAST_BREAK(%r11)
 	lmg	%r0,%r15,__PT_R0(%r11)
 	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(\name)
+SYM_CODE_END(\name)
 .endm
 
+	.section .irqentry.text, "ax"
+
 INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
 INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
 
-/*
- * Load idle PSW.
- */
-ENTRY(psw_idle)
-	stg	%r14,(__SF_GPRS+8*8)(%r15)
-	stg	%r3,__SF_EMPTY(%r15)
-	larl	%r1,psw_idle_exit
-	stg	%r1,__SF_EMPTY+8(%r15)
-	larl	%r1,smp_cpu_mtid
-	llgf	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	.Lpsw_idle_stcctm
-	.insn	rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
-.Lpsw_idle_stcctm:
-	oi	__LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
-	BPON
-	stckf	__CLOCK_IDLE_ENTER(%r2)
-	stpt	__TIMER_IDLE_ENTER(%r2)
-	lpswe	__SF_EMPTY(%r15)
-.globl psw_idle_exit
-psw_idle_exit:
-	BR_EX	%r14
-ENDPROC(psw_idle)
+	.section .kprobes.text, "ax"
 
 /*
  * Machine check handler routines
  */
-ENTRY(mcck_int_handler)
-	stckf	__LC_MCCK_CLOCK
+SYM_CODE_START(mcck_int_handler)
 	BPOFF
-	la	%r1,4095		# validate r1
-	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# validate cpu timer
-	LBEAR	__LC_LAST_BREAK_SAVE_AREA-4095(%r1)		# validate bear
-	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
-	lg	%r12,__LC_CURRENT
-	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+	GET_LC	%r13
+	lmg	%r8,%r9,__LC_MCK_OLD_PSW(%r13)
+	TSTMSK	__LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CR_VALID
+	TSTMSK	__LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
 	jno	.Lmcck_panic		# control registers invalid -> panic
-	la	%r14,4095
-	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs
 	ptlb
-	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+	lay	%r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+	mvc	__LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+	TSTMSK	__LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
 	jo	3f
-	la	%r14,__LC_SYS_ENTER_TIMER
-	clc	0(8,%r14),__LC_EXIT_TIMER
+	la	%r14,__LC_SYS_ENTER_TIMER(%r13)
+	clc	0(8,%r14),__LC_EXIT_TIMER(%r13)
 	jl	1f
-	la	%r14,__LC_EXIT_TIMER
-1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
+	la	%r14,__LC_EXIT_TIMER(%r13)
+1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
 	jl	2f
-	la	%r14,__LC_LAST_UPDATE_TIMER
+	la	%r14,__LC_LAST_UPDATE_TIMER(%r13)
 2:	spt	0(%r14)
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-3:	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+	mvc	__LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3:	TSTMSK	__LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
 	jno	.Lmcck_panic
 	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	6f
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+	jnz	.Lmcck_user
+	TSTMSK	__LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
 	jno	.Lmcck_panic
 #if IS_ENABLED(CONFIG_KVM)
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,6f
-	OUTSIDE	%r9,.Lsie_entry,.Lsie_leave,4f
-	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-	j	5f
-4:	CHKSTG	.Lmcck_panic
-5:	larl	%r14,.Lstosm_tmp
-	stosm	0(%r14),0x04		# turn dat on, keep irqs off
-	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-	SIEEXIT
-	j	.Lmcck_stack
+	lg	%r10,__LC_CURRENT(%r13)
+	tm	__TI_sie(%r10),0xff
+	jz	.Lmcck_user
+	# Need to compare the address instead of __TI_SIE flag.
+	# Otherwise there would be a race between setting the flag
+	# and entering SIE (or leaving and clearing the flag). This
+	# would cause machine checks targeted at the guest to be
+	# handled by the host.
+	larl	%r14,.Lsie_entry
+	clgrjl	%r9,%r14, 4f
+	larl	%r14,.Lsie_leave
+	clgrjhe	%r9,%r14, 4f
+	lg	%r10,__LC_PCPU(%r13)
+	oi	__PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
+4:	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT __SF_SIE_CONTROL(%r15),%r13
 #endif
-6:	CHKSTG	.Lmcck_panic
-	larl	%r14,.Lstosm_tmp
-	stosm	0(%r14),0x04		# turn dat on, keep irqs off
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_stack
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-.Lmcck_stack:
-	lg	%r15,__LC_MCCK_STACK
+.Lmcck_user:
+	lg	%r15,__LC_MCCK_STACK(%r13)
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stctg	%c1,%c1,__PT_CR1(%r11)
-	lctlg	%c1,%c1,__LC_KERNEL_ASCE
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
-	stmg	%r0,%r7,__PT_R0(%r11)
+	lay	%r14,__LC_GPREGS_SAVE_AREA(%r13)
+	mvc	__PT_R0(128,%r11),0(%r14)
 	# clear user controlled registers to prevent speculative use
 	xgr	%r0,%r0
 	xgr	%r1,%r1
@@ -584,30 +479,19 @@ ENTRY(mcck_int_handler)
 	xgr	%r6,%r6
 	xgr	%r7,%r7
 	xgr	%r10,%r10
-	mvc	__PT_R8(64,%r11),0(%r14)
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,s390_do_machine_check
-	cghi	%r2,0
-	je	.Lmcck_return
-	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r2,%r11
-	lgr	%r15,%r1
-	brasl	%r14,s390_handle_mcck
-.Lmcck_return:
-	lctlg	%c1,%c1,__PT_CR1(%r11)
 	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
-	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+	mvc	__LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+	tm	__LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
 	jno	0f
-	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-	stpt	__LC_EXIT_TIMER
-0:	ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+	BPON
+	stpt	__LC_EXIT_TIMER(%r13)
+0:	ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+		ALT_FACILITY(193)
 	LBEAR	0(%r12)
 	lmg	%r11,%r15,__PT_R11(%r11)
 	LPSWEY	__LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -621,10 +505,10 @@ ENTRY(mcck_int_handler)
 	 */
 	lhi	%r5,0
 	lhi	%r6,1
-	larl	%r7,.Lstop_lock
+	larl	%r7,stop_lock
 	cs	%r5,%r6,0(%r7)		# single CPU-stopper only
 	jnz	4f
-	larl	%r7,.Lthis_cpu
+	larl	%r7,this_cpu
 	stap	0(%r7)			# this CPU address
 	lh	%r4,0(%r7)
 	nilh	%r4,0
@@ -640,26 +524,28 @@ ENTRY(mcck_int_handler)
 3:	sigp	%r1,%r4,SIGP_STOP	# stop this CPU
 	brc	SIGP_CC_BUSY,3b
 4:	j	4b
-ENDPROC(mcck_int_handler)
+SYM_CODE_END(mcck_int_handler)
 
-ENTRY(restart_int_handler)
-	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+SYM_CODE_START(restart_int_handler)
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
 	stg	%r15,__LC_SAVE_AREA_RESTART
 	TSTMSK	__LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
 	jz	0f
-	la	%r15,4095
-	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15)
-0:	larl	%r15,.Lstosm_tmp
-	stosm	0(%r15),0x04			# turn dat on, keep irqs off
-	lg	%r15,__LC_RESTART_STACK
+	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA
+0:	larl	%r15,daton_psw
+	lpswe	0(%r15)				# turn dat on, keep irqs off
+.Ldaton:
+	GET_LC	%r15
+	lg	%r15,__LC_RESTART_STACK(%r15)
 	xc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	mvc	STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
-	mvc	STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+	GET_LC	%r13
+	mvc	STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+	mvc	STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
 	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
-	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
-	lg	%r2,__LC_RESTART_DATA
-	lgf	%r3,__LC_RESTART_SOURCE
+	lg	%r1,__LC_RESTART_FN(%r13)	# load fn, parm & source cpu
+	lg	%r2,__LC_RESTART_DATA(%r13)
+	lgf	%r3,__LC_RESTART_SOURCE(%r13)
 	ltgr	%r3,%r3				# test source cpu address
 	jm	1f				# negative -> skip source stop
 0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
@@ -670,46 +556,70 @@ ENTRY(restart_int_handler)
 2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
 	brc	2,2b
 3:	j	3b
-ENDPROC(restart_int_handler)
+SYM_CODE_END(restart_int_handler)
+
+	__INIT
+SYM_CODE_START(early_pgm_check_handler)
+	STMG_LC %r8,%r15,__LC_SAVE_AREA
+	GET_LC	%r13
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+	lgr	%r2,%r11
+	brasl	%r14,__do_early_pgm_check
+	mvc	__LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+	__FINIT
 
 	.section .kprobes.text, "ax"
 
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
 /*
- * The synchronous or the asynchronous stack overflowed. We are dead.
+ * The synchronous or the asynchronous stack pointer is invalid. We are dead.
  * No need to properly save the registers, we are going to panic anyway.
  * Setup a pt_regs so that show_trace can provide a good call trace.
  */
-ENTRY(stack_overflow)
-	lg	%r15,__LC_NODAT_STACK	# change to panic stack
+SYM_CODE_START(stack_invalid)
+	GET_LC	%r15
+	lg	%r15,__LC_NODAT_STACK(%r15) # change to panic stack
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_R8(64,%r11),0(%r14)
-	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+	GET_LC	%r2
+	mvc	__PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
-	jg	kernel_stack_overflow
-ENDPROC(stack_overflow)
-#endif
+	jg	kernel_stack_invalid
+SYM_CODE_END(stack_invalid)
 
 	.section .data, "aw"
-		.align	4
-.Lstop_lock:	.long	0
-.Lthis_cpu:	.short	0
-.Lstosm_tmp:	.byte	0
+	.balign	4
+SYM_DATA_LOCAL(stop_lock,	.long 0)
+SYM_DATA_LOCAL(this_cpu,	.short 0)
+	.balign	8
+SYM_DATA_START_LOCAL(daton_psw)
+	.quad	PSW_KERNEL_BITS
+	.quad	.Ldaton
+SYM_DATA_END(daton_psw)
+
 	.section .rodata, "a"
+	.balign	8
 #define SYSCALL(esame,emu)	.quad __s390x_ ## esame
-	.globl	sys_call_table
-sys_call_table:
-#include "asm/syscall_table.h"
+SYM_DATA_START(sys_call_table)
+#include <asm/syscall_table.h>
+SYM_DATA_END(sys_call_table)
 #undef SYSCALL
 
 #ifdef CONFIG_COMPAT
 
 #define SYSCALL(esame,emu)	.quad __s390_ ## emu
-	.globl	sys_call_table_emu
-sys_call_table_emu:
-#include "asm/syscall_table.h"
+SYM_DATA_START(sys_call_table_emu)
+#include <asm/syscall_table.h>
+SYM_DATA_END(sys_call_table_emu)
 #undef SYSCALL
 #endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 995ec7449feb..dd55cc6bbc28 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -19,6 +19,7 @@ void mcck_int_handler(void);
 void restart_int_handler(void);
 void early_pgm_check_handler(void);
 
+struct task_struct *__switch_to_asm(struct task_struct *prev, struct task_struct *next);
 void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
 void __do_pgm_check(struct pt_regs *regs);
 void __do_syscall(struct pt_regs *regs, int per_trap);
@@ -30,19 +31,16 @@ void do_secure_storage_access(struct pt_regs *regs);
 void do_non_secure_storage_access(struct pt_regs *regs);
 void do_secure_storage_violation(struct pt_regs *regs);
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
-void kernel_stack_overflow(struct pt_regs * regs);
+void kernel_stack_invalid(struct pt_regs *regs);
 void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 		     struct pt_regs *regs);
 
-void __init init_IRQ(void);
 void do_io_irq(struct pt_regs *regs);
 void do_ext_irq(struct pt_regs *regs);
 void do_restart(void *arg);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
 int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
 
 struct s390_mmap_arg_struct;
 struct fadvise64_64_args;
@@ -73,6 +71,5 @@ extern struct exception_table_entry _stop_amode31_ex_table[];
 #define __amode31_data __section(".amode31.data")
 #define __amode31_ref __section(".amode31.refs")
 extern long _start_amode31_refs[], _end_amode31_refs[];
-extern unsigned long __amode31_base;
 
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000000..f02127219a27
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+	static unsigned int size;
+	unsigned int r;
+	u64 dummy;
+
+	r = READ_ONCE(size);
+	if (!r) {
+		r = __stfle_asm(&dummy, 1) + 1;
+		WRITE_ONCE(size, r);
+	}
+	return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index d864c9a325e2..6f2e87920288 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -8,258 +8,186 @@
 #include <linux/kernel.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
 
-asm(".include \"asm/vx-insn.h\"\n");
-
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
 {
+	__vector128 *vxrs = state->vxrs;
+	int mask;
+
 	/*
 	 * Limit the save to the FPU/vector registers already
-	 * in use by the previous context
+	 * in use by the previous context.
 	 */
-	flags &= state->mask;
-
+	flags &= state->hdr.mask;
 	if (flags & KERNEL_FPC)
-		/* Save floating point control */
-		asm volatile("stfpc %0" : "=Q" (state->fpc));
-
-	if (!MACHINE_HAS_VX) {
-		if (flags & KERNEL_VXR_V0V7) {
-			/* Save floating-point registers */
-			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
-			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
-			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
-			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
-			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
-			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
-			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
-			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
-			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
-			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
-			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
-			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
-			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
-			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
-			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
-			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
-		}
+		fpu_stfpc(&state->hdr.fpc);
+	if (!cpu_has_vx()) {
+		if (flags & KERNEL_VXR_LOW)
+			save_fp_regs_vx(vxrs);
 		return;
 	}
-
-	/* Test and save vector registers */
-	asm volatile (
-		/*
-		 * Test if any vector register must be saved and, if so,
-		 * test if all register can be saved.
-		 */
-		"	la	1,%[vxrs]\n"	/* load save area */
-		"	tmll	%[m],30\n"	/* KERNEL_VXR */
-		"	jz	7f\n"		/* no work -> done */
-		"	jo	5f\n"		/* -> save V0..V31 */
-		/*
-		 * Test for special case KERNEL_FPU_MID only. In this
-		 * case a vstm V8..V23 is the best instruction
-		 */
-		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
-		"	jne	0f\n"		/* -> save V8..V23 */
-		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
-		"	j	7f\n"
-		/* Test and save the first half of 16 vector registers */
-		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
-		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
-		"	jo	2f\n"		/* 11 -> save V0..V15 */
-		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
-		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
-		"	j	3f\n"
-		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
-		"	j	3f\n"
-		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
-		/* Test and save the second half of 16 vector registers */
-		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
-		"	jz	7f\n"
-		"	jo	6f\n"		/* 11 -> save V16..V31 */
-		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
-		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
-		"	j	7f\n"
-		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
-		"	j	7f\n"
-		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
-		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
-		"7:"
-		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-		: [m] "d" (flags)
-		: "1", "cc");
+	mask = flags & KERNEL_VXR;
+	if (mask == KERNEL_VXR) {
+		vxrs += fpu_vstm(0, 15, vxrs);
+		vxrs += fpu_vstm(16, 31, vxrs);
+		return;
+	}
+	if (mask == KERNEL_VXR_MID) {
+		vxrs += fpu_vstm(8, 23, vxrs);
+		return;
+	}
+	mask = flags & KERNEL_VXR_LOW;
+	if (mask) {
+		if (mask == KERNEL_VXR_LOW)
+			vxrs += fpu_vstm(0, 15, vxrs);
+		else if (mask == KERNEL_VXR_V0V7)
+			vxrs += fpu_vstm(0, 7, vxrs);
+		else
+			vxrs += fpu_vstm(8, 15, vxrs);
+	}
+	mask = flags & KERNEL_VXR_HIGH;
+	if (mask) {
+		if (mask == KERNEL_VXR_HIGH)
+			vxrs += fpu_vstm(16, 31, vxrs);
+		else if (mask == KERNEL_VXR_V16V23)
+			vxrs += fpu_vstm(16, 23, vxrs);
+		else
+			vxrs += fpu_vstm(24, 31, vxrs);
+	}
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_end(struct kernel_fpu *state, int flags)
 {
+	__vector128 *vxrs = state->vxrs;
+	int mask;
+
 	/*
 	 * Limit the restore to the FPU/vector registers of the
-	 * previous context that have been overwritte by the
-	 * current context
+	 * previous context that have been overwritten by the
+	 * current context.
 	 */
-	flags &= state->mask;
-
+	flags &= state->hdr.mask;
 	if (flags & KERNEL_FPC)
-		/* Restore floating-point controls */
-		asm volatile("lfpc %0" : : "Q" (state->fpc));
-
-	if (!MACHINE_HAS_VX) {
-		if (flags & KERNEL_VXR_V0V7) {
-			/* Restore floating-point registers */
-			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
-			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
-			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
-			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
-			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
-			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
-			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
-			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
-			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
-			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
-			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
-			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
-			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
-			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
-			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
-			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
-		}
+		fpu_lfpc(&state->hdr.fpc);
+	if (!cpu_has_vx()) {
+		if (flags & KERNEL_VXR_LOW)
+			load_fp_regs_vx(vxrs);
 		return;
 	}
-
-	/* Test and restore (load) vector registers */
-	asm volatile (
-		/*
-		 * Test if any vector register must be loaded and, if so,
-		 * test if all registers can be loaded at once.
-		 */
-		"	la	1,%[vxrs]\n"	/* load restore area */
-		"	tmll	%[m],30\n"	/* KERNEL_VXR */
-		"	jz	7f\n"		/* no work -> done */
-		"	jo	5f\n"		/* -> restore V0..V31 */
-		/*
-		 * Test for special case KERNEL_FPU_MID only. In this
-		 * case a vlm V8..V23 is the best instruction
-		 */
-		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
-		"	jne	0f\n"		/* -> restore V8..V23 */
-		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
-		"	j	7f\n"
-		/* Test and restore the first half of 16 vector registers */
-		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
-		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
-		"	jo	2f\n"		/* 11 -> restore V0..V15 */
-		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
-		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
-		"	j	3f\n"
-		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
-		"	j	3f\n"
-		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
-		/* Test and restore the second half of 16 vector registers */
-		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
-		"	jz	7f\n"
-		"	jo	6f\n"		/* 11 -> restore V16..V31 */
-		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
-		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
-		"	j	7f\n"
-		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
-		"	j	7f\n"
-		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
-		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
-		"7:"
-		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-		: [m] "d" (flags)
-		: "1", "cc");
+	mask = flags & KERNEL_VXR;
+	if (mask == KERNEL_VXR) {
+		vxrs += fpu_vlm(0, 15, vxrs);
+		vxrs += fpu_vlm(16, 31, vxrs);
+		return;
+	}
+	if (mask == KERNEL_VXR_MID) {
+		vxrs += fpu_vlm(8, 23, vxrs);
+		return;
+	}
+	mask = flags & KERNEL_VXR_LOW;
+	if (mask) {
+		if (mask == KERNEL_VXR_LOW)
+			vxrs += fpu_vlm(0, 15, vxrs);
+		else if (mask == KERNEL_VXR_V0V7)
+			vxrs += fpu_vlm(0, 7, vxrs);
+		else
+			vxrs += fpu_vlm(8, 15, vxrs);
+	}
+	mask = flags & KERNEL_VXR_HIGH;
+	if (mask) {
+		if (mask == KERNEL_VXR_HIGH)
+			vxrs += fpu_vlm(16, 31, vxrs);
+		else if (mask == KERNEL_VXR_V16V23)
+			vxrs += fpu_vlm(16, 23, vxrs);
+		else
+			vxrs += fpu_vlm(24, 31, vxrs);
+	}
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
 
-void __load_fpu_regs(void)
+void load_fpu_state(struct fpu *state, int flags)
 {
-	struct fpu *state = &current->thread.fpu;
-	unsigned long *regs = current->thread.fpu.regs;
+	__vector128 *vxrs = &state->vxrs[0];
+	int mask;
 
-	asm volatile("lfpc %0" : : "Q" (state->fpc));
-	if (likely(MACHINE_HAS_VX)) {
-		asm volatile("lgr	1,%0\n"
-			     "VLM	0,15,0,1\n"
-			     "VLM	16,31,256,1\n"
-			     :
-			     : "d" (regs)
-			     : "1", "cc", "memory");
-	} else {
-		asm volatile("ld 0,%0" : : "Q" (regs[0]));
-		asm volatile("ld 1,%0" : : "Q" (regs[1]));
-		asm volatile("ld 2,%0" : : "Q" (regs[2]));
-		asm volatile("ld 3,%0" : : "Q" (regs[3]));
-		asm volatile("ld 4,%0" : : "Q" (regs[4]));
-		asm volatile("ld 5,%0" : : "Q" (regs[5]));
-		asm volatile("ld 6,%0" : : "Q" (regs[6]));
-		asm volatile("ld 7,%0" : : "Q" (regs[7]));
-		asm volatile("ld 8,%0" : : "Q" (regs[8]));
-		asm volatile("ld 9,%0" : : "Q" (regs[9]));
-		asm volatile("ld 10,%0" : : "Q" (regs[10]));
-		asm volatile("ld 11,%0" : : "Q" (regs[11]));
-		asm volatile("ld 12,%0" : : "Q" (regs[12]));
-		asm volatile("ld 13,%0" : : "Q" (regs[13]));
-		asm volatile("ld 14,%0" : : "Q" (regs[14]));
-		asm volatile("ld 15,%0" : : "Q" (regs[15]));
+	if (flags & KERNEL_FPC)
+		fpu_lfpc_safe(&state->fpc);
+	if (!cpu_has_vx()) {
+		if (flags & KERNEL_VXR_V0V7)
+			load_fp_regs_vx(state->vxrs);
+		return;
+	}
+	mask = flags & KERNEL_VXR;
+	if (mask == KERNEL_VXR) {
+		fpu_vlm(0, 15, &vxrs[0]);
+		fpu_vlm(16, 31, &vxrs[16]);
+		return;
+	}
+	if (mask == KERNEL_VXR_MID) {
+		fpu_vlm(8, 23, &vxrs[8]);
+		return;
+	}
+	mask = flags & KERNEL_VXR_LOW;
+	if (mask) {
+		if (mask == KERNEL_VXR_LOW)
+			fpu_vlm(0, 15, &vxrs[0]);
+		else if (mask == KERNEL_VXR_V0V7)
+			fpu_vlm(0, 7, &vxrs[0]);
+		else
+			fpu_vlm(8, 15, &vxrs[8]);
+	}
+	mask = flags & KERNEL_VXR_HIGH;
+	if (mask) {
+		if (mask == KERNEL_VXR_HIGH)
+			fpu_vlm(16, 31, &vxrs[16]);
+		else if (mask == KERNEL_VXR_V16V23)
+			fpu_vlm(16, 23, &vxrs[16]);
+		else
+			fpu_vlm(24, 31, &vxrs[24]);
 	}
-	clear_cpu_flag(CIF_FPU);
-}
-EXPORT_SYMBOL(__load_fpu_regs);
-
-void load_fpu_regs(void)
-{
-	raw_local_irq_disable();
-	__load_fpu_regs();
-	raw_local_irq_enable();
 }
-EXPORT_SYMBOL(load_fpu_regs);
 
-void save_fpu_regs(void)
+void save_fpu_state(struct fpu *state, int flags)
 {
-	unsigned long flags, *regs;
-	struct fpu *state;
-
-	local_irq_save(flags);
+	__vector128 *vxrs = &state->vxrs[0];
+	int mask;
 
-	if (test_cpu_flag(CIF_FPU))
-		goto out;
-
-	state = &current->thread.fpu;
-	regs = current->thread.fpu.regs;
-
-	asm volatile("stfpc %0" : "=Q" (state->fpc));
-	if (likely(MACHINE_HAS_VX)) {
-		asm volatile("lgr	1,%0\n"
-			     "VSTM	0,15,0,1\n"
-			     "VSTM	16,31,256,1\n"
-			     :
-			     : "d" (regs)
-			     : "1", "cc", "memory");
-	} else {
-		asm volatile("std 0,%0" : "=Q" (regs[0]));
-		asm volatile("std 1,%0" : "=Q" (regs[1]));
-		asm volatile("std 2,%0" : "=Q" (regs[2]));
-		asm volatile("std 3,%0" : "=Q" (regs[3]));
-		asm volatile("std 4,%0" : "=Q" (regs[4]));
-		asm volatile("std 5,%0" : "=Q" (regs[5]));
-		asm volatile("std 6,%0" : "=Q" (regs[6]));
-		asm volatile("std 7,%0" : "=Q" (regs[7]));
-		asm volatile("std 8,%0" : "=Q" (regs[8]));
-		asm volatile("std 9,%0" : "=Q" (regs[9]));
-		asm volatile("std 10,%0" : "=Q" (regs[10]));
-		asm volatile("std 11,%0" : "=Q" (regs[11]));
-		asm volatile("std 12,%0" : "=Q" (regs[12]));
-		asm volatile("std 13,%0" : "=Q" (regs[13]));
-		asm volatile("std 14,%0" : "=Q" (regs[14]));
-		asm volatile("std 15,%0" : "=Q" (regs[15]));
+	if (flags & KERNEL_FPC)
+		fpu_stfpc(&state->fpc);
+	if (!cpu_has_vx()) {
+		if (flags & KERNEL_VXR_LOW)
+			save_fp_regs_vx(state->vxrs);
+		return;
+	}
+	mask = flags & KERNEL_VXR;
+	if (mask == KERNEL_VXR) {
+		fpu_vstm(0, 15, &vxrs[0]);
+		fpu_vstm(16, 31, &vxrs[16]);
+		return;
+	}
+	if (mask == KERNEL_VXR_MID) {
+		fpu_vstm(8, 23, &vxrs[8]);
+		return;
+	}
+	mask = flags & KERNEL_VXR_LOW;
+	if (mask) {
+		if (mask == KERNEL_VXR_LOW)
+			fpu_vstm(0, 15, &vxrs[0]);
+		else if (mask == KERNEL_VXR_V0V7)
+			fpu_vstm(0, 7, &vxrs[0]);
+		else
+			fpu_vstm(8, 15, &vxrs[8]);
+	}
+	mask = flags & KERNEL_VXR_HIGH;
+	if (mask) {
+		if (mask == KERNEL_VXR_HIGH)
+			fpu_vstm(16, 31, &vxrs[16]);
+		else if (mask == KERNEL_VXR_V16V23)
+			fpu_vstm(16, 23, &vxrs[16]);
+		else
+			fpu_vstm(24, 31, &vxrs[24]);
 	}
-	set_cpu_flag(CIF_FPU);
-out:
-	local_irq_restore(flags);
 }
-EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(save_fpu_state);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 416b5a94353d..e94bb98f5231 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,15 @@
  *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#include <linux/moduleloader.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/kmsan-checks.h>
+#include <linux/cpufeature.h>
 #include <linux/kprobes.h>
+#include <linux/execmem.h>
 #include <trace/syscall.h>
 #include <asm/asm-offsets.h>
 #include <asm/text-patching.h>
@@ -49,30 +51,6 @@ struct ftrace_insn {
 	s32 disp;
 } __packed;
 
-asm(
-	"	.align 16\n"
-	"ftrace_shared_hotpatch_trampoline_br:\n"
-	"	lmg	%r0,%r1,2(%r1)\n"
-	"	br	%r1\n"
-	"ftrace_shared_hotpatch_trampoline_br_end:\n"
-);
-
-#ifdef CONFIG_EXPOLINE
-asm(
-	"	.align 16\n"
-	"ftrace_shared_hotpatch_trampoline_exrl:\n"
-	"	lmg	%r0,%r1,2(%r1)\n"
-	"	exrl	%r0,0f\n"
-	"	j	.\n"
-	"0:	br	%r1\n"
-	"ftrace_shared_hotpatch_trampoline_exrl_end:\n"
-);
-#endif /* CONFIG_EXPOLINE */
-
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
 static const char *ftrace_shared_hotpatch_trampoline(const char **end)
 {
 	const char *tstart, *tend;
@@ -92,19 +70,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
 
 bool ftrace_need_init_nop(void)
 {
-	return true;
+	return !cpu_has_seq_insn();
 }
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 {
 	static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
 		__ftrace_hotpatch_trampolines_start;
-	static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+	static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
 	static struct ftrace_hotpatch_trampoline *trampoline;
 	struct ftrace_hotpatch_trampoline **next_trampoline;
 	struct ftrace_hotpatch_trampoline *trampolines_end;
 	struct ftrace_hotpatch_trampoline tmp;
 	struct ftrace_insn *insn;
+	struct ftrace_insn old;
 	const char *shared;
 	s32 disp;
 
@@ -118,7 +97,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 	if (mod) {
 		next_trampoline = &mod->arch.next_trampoline;
 		trampolines_end = mod->arch.trampolines_end;
-		shared = ftrace_plt;
 	}
 #endif
 
@@ -126,8 +104,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
 		return -ENOMEM;
 	trampoline = (*next_trampoline)++;
 
+	if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+		return -EFAULT;
 	/* Check for the compiler-generated fentry nop (brcl 0, .). */
-	if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+	if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
 		return -EINVAL;
 
 	/* Generate the trampoline. */
@@ -163,8 +143,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
 	return trampoline;
 }
 
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
-		       unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+	/* brasl r0,target or brcl 0,0 */
+	return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+				     .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+				    unsigned long target)
+{
+	struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+	struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+	struct ftrace_insn old;
+
+	if (!IS_ALIGNED(ip, 8))
+		return -EINVAL;
+	if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+		return -EFAULT;
+	/* Verify that the to be replaced code matches what we expect. */
+	if (memcmp(&orig, &old, sizeof(old)))
+		return -EINVAL;
+	s390_kernel_write((void *)ip, &new, sizeof(new));
+	return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+					 unsigned long old_addr,
+					 unsigned long addr)
 {
 	struct ftrace_hotpatch_trampoline *trampoline;
 	u64 old;
@@ -180,6 +187,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	return 0;
 }
 
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	if (cpu_has_seq_insn())
+		return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+	else
+		return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
 static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
 {
 	u16 old;
@@ -198,11 +214,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 		    unsigned long addr)
 {
-	/* Expect brcl 0xf,... */
-	return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+	/* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */
+	if (cpu_has_seq_insn())
+		return ftrace_patch_branch_insn(rec->ip, addr, 0);
+	else
+		return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
 }
 
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	struct ftrace_hotpatch_trampoline *trampoline;
 
@@ -214,6 +233,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
 }
 
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	if (cpu_has_seq_insn())
+		return ftrace_patch_branch_insn(rec->ip, 0, addr);
+	else
+		return ftrace_make_trampoline_call(rec, addr);
+}
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	ftrace_func = func;
@@ -234,75 +261,20 @@ void ftrace_arch_code_modify_post_process(void)
 	text_poke_sync_lock();
 }
 
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static int __init ftrace_plt_init(void)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-	const char *start, *end;
-
-	ftrace_plt = module_alloc(PAGE_SIZE);
-	if (!ftrace_plt)
-		panic("cannot allocate ftrace plt\n");
-
-	start = ftrace_shared_hotpatch_trampoline(&end);
-	memcpy(ftrace_plt, start, end - start);
-	set_memory_ro((unsigned long)ftrace_plt, 1);
-	return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
+	unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
+	unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15];
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
-				    unsigned long ip)
-{
 	if (unlikely(ftrace_graph_is_dead()))
-		goto out;
+		return;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
-		goto out;
-	ip -= MCOUNT_INSN_SIZE;
-	if (!function_graph_enter(ra, ip, 0, (void *) sp))
-		ra = (unsigned long) return_to_handler;
-out:
-	return ra;
-}
-NOKPROBE_SYMBOL(prepare_ftrace_return);
-
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative on condition. To enable the ftrace graph code
- * block, we simply patch the mask field of the instruction to zero and
- * turn the instruction into a nop.
- * To disable the ftrace graph code the mask field will be patched to
- * all ones, which turns the instruction into an unconditional branch.
- */
-int ftrace_enable_ftrace_graph_caller(void)
-{
-	int rc;
-
-	/* Expect brc 0xf,... */
-	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
-	if (rc)
-		return rc;
-	text_poke_sync_lock();
-	return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
-	int rc;
-
-	/* Expect brc 0x0,... */
-	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
-	if (rc)
-		return rc;
-	text_poke_sync_lock();
-	return 0;
+		return;
+	if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs))
+		*parent = (unsigned long)&return_to_handler;
 }
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -316,10 +288,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	struct kprobe *p;
 	int bit;
 
+	if (unlikely(kprobe_ftrace_disabled))
+		return;
+
 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (bit < 0)
 		return;
 
+	kmsan_unpoison_memory(fregs, ftrace_regs_size());
 	regs = ftrace_get_regs(fregs);
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (!regs || unlikely(!p) || kprobe_disabled(p))
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
 extern const char ftrace_shared_hotpatch_trampoline_br_end[];
 extern const char ftrace_shared_hotpatch_trampoline_exrl[];
 extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
 
 #endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
index d14dd1c2e524..cf26d7a37425 100644
--- a/arch/s390/kernel/guarded_storage.c
+++ b/arch/s390/kernel/guarded_storage.c
@@ -4,6 +4,7 @@
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
+#include <linux/cpufeature.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
@@ -28,7 +29,7 @@ static int gs_enable(void)
 			return -ENOMEM;
 		gs_cb->gsd = 25;
 		preempt_disable();
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		load_gs_cb(gs_cb);
 		current->thread.gs_cb = gs_cb;
 		preempt_enable();
@@ -42,7 +43,7 @@ static int gs_disable(void)
 		preempt_disable();
 		kfree(current->thread.gs_cb);
 		current->thread.gs_cb = NULL;
-		__ctl_clear_bit(2, 4);
+		local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
 		preempt_enable();
 	}
 	return 0;
@@ -84,7 +85,7 @@ void gs_load_bc_cb(struct pt_regs *regs)
 	if (gs_cb) {
 		kfree(current->thread.gs_cb);
 		current->thread.gs_bc_cb = NULL;
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		load_gs_cb(gs_cb);
 		current->thread.gs_cb = gs_cb;
 	}
@@ -109,7 +110,7 @@ static int gs_broadcast(void)
 SYSCALL_DEFINE2(s390_guarded_storage, int, command,
 		struct gs_cb __user *, gs_cb)
 {
-	if (!MACHINE_HAS_GS)
+	if (!cpu_has_gs())
 		return -EOPNOTSUPP;
 	switch (command) {
 	case GS_ENABLE:
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7b8b6ad574d..7edb9ded199c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -10,32 +10,31 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <asm/lowcore.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 
 __HEAD
-ENTRY(startup_continue)
-	larl	%r1,tod_clock_base
-	mvc	0(16,%r1),__LC_BOOT_CLOCK
+SYM_CODE_START(startup_continue)
 #
 # Setup stack
 #
+	GET_LC	%r2
 	larl	%r14,init_task
-	stg	%r14,__LC_CURRENT
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
-#ifdef CONFIG_KASAN
-	brasl	%r14,kasan_early_init
-#endif
+	stg	%r14,__LC_CURRENT(%r2)
+	larl	%r15,init_thread_union+STACK_INIT_OFFSET
+	stg	%r15,__LC_KERNEL_STACK(%r2)
+	brasl	%r14,sclp_early_adjust_va	# allow sclp_early_printk
 	brasl	%r14,startup_init		# s390 specific early init
 	brasl	%r14,start_kernel		# common init code
 #
 # We returned from start_kernel ?!? PANIK
 #
 	basr	%r13,0
-	lpswe	.Ldw-.(%r13)		# load disabled wait psw
+	lpswe	dw_psw-.(%r13)		# load disabled wait psw
+SYM_CODE_END(startup_continue)
 
-	.align	16
-.LPG1:
-.Ldw:	.quad	0x0002000180000000,0x0000000000000000
+	.balign	16
+SYM_DATA_LOCAL(dw_psw,	.quad 0x0002000180000000,0x0000000000000000)
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..e7b66d046e8d
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "hd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ *    determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ *    updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR			(4)
+#define HD_DELAY_INTERVAL		(HZ / 4)
+#define HD_STEAL_THRESHOLD		30
+#define HD_STEAL_AVG_WEIGHT		16
+
+static cpumask_t hd_vl_coremask;	/* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask;	/* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores;	/* Current CORE count with high capacity */
+static int hd_entitled_cores;		/* Total vertical high and medium CORE count */
+static int hd_online_cores;		/* Current online CORE count */
+
+static unsigned long hd_previous_steal;	/* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time;	/* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time;	/* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments;	/* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+	if (!cpu_has_topology())
+		enable = 0;
+	if (hd_enabled == enable)
+		return 0;
+	hd_enabled = enable;
+	return 1;
+}
+
+void hd_reset_state(void)
+{
+	cpumask_clear(&hd_vl_coremask);
+	cpumask_clear(&hd_vmvl_cpumask);
+	hd_entitled_cores = 0;
+	hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+	const struct cpumask *siblings;
+	int polarization;
+
+	hd_online_cores++;
+	polarization = smp_cpu_get_polarization(cpu);
+	siblings = topology_sibling_cpumask(cpu);
+	switch (polarization) {
+	case POLARIZATION_VH:
+		hd_entitled_cores++;
+		break;
+	case POLARIZATION_VM:
+		hd_entitled_cores++;
+		cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+		break;
+	case POLARIZATION_VL:
+		cpumask_set_cpu(cpu, &hd_vl_coremask);
+		cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+		break;
+	}
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+	static ktime_t prev;
+	ktime_t now;
+
+	/*
+	 * Check if hiperdispatch is active, if not set the prev to 0.
+	 * This way it is possible to differentiate the first update iteration after
+	 * enabling hiperdispatch.
+	 */
+	if (hd_entitled_cores == 0 || hd_enabled == 0) {
+		prev = ktime_set(0, 0);
+		return;
+	}
+	now = ktime_get();
+	if (ktime_after(prev, 0)) {
+		if (hd_high_capacity_cores == hd_online_cores)
+			hd_high_time += ktime_ms_delta(now, prev);
+		else
+			hd_low_time += ktime_ms_delta(now, prev);
+	}
+	prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+	int cpu, upscaling_cores;
+	unsigned long capacity;
+
+	upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+	capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+	hd_high_capacity_cores = hd_entitled_cores;
+	for_each_cpu(cpu, &hd_vl_coremask) {
+		smp_set_core_capacity(cpu, capacity);
+		if (capacity != CPU_CAPACITY_HIGH)
+			continue;
+		hd_high_capacity_cores++;
+		upscaling_cores--;
+		if (upscaling_cores == 0)
+			capacity = CPU_CAPACITY_LOW;
+	}
+}
+
+void hd_disable_hiperdispatch(void)
+{
+	cancel_delayed_work_sync(&hd_capacity_work);
+	hd_high_capacity_cores = hd_online_cores;
+	hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	mutex_unlock(&hd_counter_mutex);
+	if (hd_enabled == 0)
+		return 0;
+	if (hd_entitled_cores == 0)
+		return 0;
+	if (hd_online_cores <= hd_entitled_cores)
+		return 0;
+	mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+	hd_update_capacities();
+	return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+	static unsigned long steal;
+
+	steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+	return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+	unsigned long time_delta, steal_delta, steal, percentage;
+	static ktime_t prev;
+	int cpus, cpu;
+	ktime_t now;
+
+	cpus = 0;
+	steal = 0;
+	percentage = 0;
+	for_each_cpu(cpu, &hd_vmvl_cpumask) {
+		steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+		cpus++;
+	}
+	/*
+	 * If there is no vertical medium and low CPUs steal time
+	 * is 0 as vertical high CPUs shouldn't experience steal time.
+	 */
+	if (cpus == 0)
+		return percentage;
+	now = ktime_get();
+	time_delta = ktime_to_ns(ktime_sub(now, prev));
+	if (steal > hd_previous_steal && hd_previous_steal != 0) {
+		steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+		percentage = steal_delta / cpus;
+	}
+	hd_previous_steal = steal;
+	prev = now;
+	return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+	unsigned long steal_percentage, new_cores;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	/*
+	 * If online cores are less or equal to entitled cores hiperdispatch
+	 * does not need to make any adjustments, call a topology update to
+	 * disable hiperdispatch.
+	 * Normally this check is handled on topology update, but during cpu
+	 * unhotplug, topology and cpu mask updates are done in reverse
+	 * order, causing hd_enable_hiperdispatch() to get stale data.
+	 */
+	if (hd_online_cores <= hd_entitled_cores) {
+		topology_schedule_update();
+		mutex_unlock(&smp_cpu_state_mutex);
+		return;
+	}
+	steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+	if (steal_percentage < hd_steal_threshold)
+		new_cores = hd_online_cores;
+	else
+		new_cores = hd_entitled_cores;
+	if (hd_high_capacity_cores != new_cores) {
+		trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+		hd_high_capacity_cores = new_cores;
+		atomic64_inc(&hd_adjustments);
+		topology_schedule_update();
+	}
+	trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+	mutex_unlock(&smp_cpu_state_mutex);
+	schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+				     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int hiperdispatch;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &hiperdispatch,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	hiperdispatch = hd_enabled;
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+	mutex_lock(&smp_cpu_state_mutex);
+	if (hd_set_hiperdispatch_mode(hiperdispatch))
+		topology_schedule_update();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return 0;
+}
+
+static const struct ctl_table hiperdispatch_ctl_table[] = {
+	{
+		.procname	= "hiperdispatch",
+		.mode		= 0644,
+		.proc_handler	= hiperdispatch_ctl_handler,
+	},
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf,
+					size_t count)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+	if (val > 100)
+		return -ERANGE;
+	hd_steal_threshold = val;
+	return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf,
+				     size_t count)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+	if (!val)
+		return -ERANGE;
+	hd_delay_factor = val;
+	return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+	&dev_attr_hd_steal_threshold.attr,
+	&dev_attr_hd_delay_factor.attr,
+	NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+	.name  = "hiperdispatch",
+	.attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	*val = hd_high_time;
+	mutex_unlock(&hd_counter_mutex);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+	mutex_lock(&hd_counter_mutex);
+	hd_update_times();
+	*val = hd_low_time;
+	mutex_unlock(&hd_counter_mutex);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+	*val = atomic64_read(&hd_adjustments);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+	debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+	debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+	debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+	struct device *dev;
+
+	dev = bus_get_dev_root(&cpu_subsys);
+	if (!dev)
+		return;
+	if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+		pr_warn("Unable to create hiperdispatch attribute group\n");
+	put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+	if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+		hd_set_hiperdispatch_mode(1);
+		topology_schedule_update();
+	}
+	if (!register_sysctl("s390", hiperdispatch_ctl_table))
+		pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+	hd_create_debugfs_counters();
+	hd_create_attributes();
+	return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 4bf1ee293f2b..39cb8d0ae348 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -12,9 +12,9 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
-#include <linux/sched/cputime.h>
 #include <trace/events/power.h>
 #include <asm/cpu_mf.h>
+#include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
@@ -24,117 +24,66 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 void account_idle_time_irq(void)
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	struct lowcore *lc = get_lowcore();
+	unsigned long idle_time;
 	u64 cycles_new[8];
 	int i;
 
-	clear_cpu_flag(CIF_ENABLED_WAIT);
 	if (smp_cpu_mtid) {
 		stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
 		for (i = 0; i < smp_cpu_mtid; i++)
 			this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
 	}
 
-	idle->clock_idle_exit = S390_lowcore.int_clock;
-	idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+	idle_time = lc->int_clock - idle->clock_idle_enter;
+
+	lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+	lc->last_update_clock = lc->int_clock;
 
-	S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
-	S390_lowcore.last_update_clock = idle->clock_idle_exit;
+	lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+	lc->last_update_timer = lc->sys_enter_timer;
 
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
-	S390_lowcore.last_update_timer = idle->timer_idle_exit;
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+	account_idle_time(cputime_to_nsecs(idle_time));
 }
 
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
-	unsigned long idle_time;
 	unsigned long psw_mask;
 
 	/* Wait for external, I/O or machine check interrupt. */
-	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
-		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+		   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	clear_cpu_flag(CIF_NOHZ_DELAY);
-
-	/* psw_idle() returns with interrupts disabled. */
-	psw_idle(idle, psw_mask);
-
-	/* Account time spent with enabled wait psw loaded as idle time. */
-	raw_write_seqcount_begin(&idle->seqcount);
-	idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
-	idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
-	idle->idle_time += idle_time;
-	idle->idle_count++;
-	account_idle_time(cputime_to_nsecs(idle_time));
-	raw_write_seqcount_end(&idle->seqcount);
-	raw_local_irq_enable();
+	set_cpu_flag(CIF_ENABLED_WAIT);
+	if (smp_cpu_mtid)
+		stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
+	idle->clock_idle_enter = get_tod_clock_fast();
+	idle->timer_idle_enter = get_cpu_timer();
+	bpon();
+	__load_psw_mask(psw_mask);
 }
 
 static ssize_t show_idle_count(struct device *dev,
-				struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-	unsigned long idle_count;
-	unsigned int seq;
-
-	do {
-		seq = read_seqcount_begin(&idle->seqcount);
-		idle_count = READ_ONCE(idle->idle_count);
-		if (READ_ONCE(idle->clock_idle_enter))
-			idle_count++;
-	} while (read_seqcount_retry(&idle->seqcount, seq));
-	return sprintf(buf, "%lu\n", idle_count);
+
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
 }
 DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
 
 static ssize_t show_idle_time(struct device *dev,
-				struct device_attribute *attr, char *buf)
+			      struct device_attribute *attr, char *buf)
 {
-	unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-	unsigned int seq;
-
-	do {
-		seq = read_seqcount_begin(&idle->seqcount);
-		idle_time = READ_ONCE(idle->idle_time);
-		idle_enter = READ_ONCE(idle->clock_idle_enter);
-		idle_exit = READ_ONCE(idle->clock_idle_exit);
-	} while (read_seqcount_retry(&idle->seqcount, seq));
-	in_idle = 0;
-	now = get_tod_clock();
-	if (idle_enter) {
-		if (idle_exit) {
-			in_idle = idle_exit - idle_enter;
-		} else if (now > idle_enter) {
-			in_idle = now - idle_enter;
-		}
-	}
-	idle_time += in_idle;
-	return sprintf(buf, "%lu\n", idle_time >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
-u64 arch_cpu_idle_time(int cpu)
-{
-	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
-	unsigned long now, idle_enter, idle_exit, in_idle;
-	unsigned int seq;
-
-	do {
-		seq = read_seqcount_begin(&idle->seqcount);
-		idle_enter = READ_ONCE(idle->clock_idle_enter);
-		idle_exit = READ_ONCE(idle->clock_idle_exit);
-	} while (read_seqcount_retry(&idle->seqcount, seq));
-	in_idle = 0;
-	now = get_tod_clock();
-	if (idle_enter) {
-		if (idle_exit) {
-			in_idle = idle_exit - idle_enter;
-		} else if (now > idle_enter) {
-			in_idle = now - idle_enter;
-		}
-	}
-	return cputime_to_nsecs(in_idle);
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
 }
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
 void arch_cpu_idle_enter(void)
 {
@@ -144,7 +93,7 @@ void arch_cpu_idle_exit(void)
 {
 }
 
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
 {
 	cpu_die();
 }
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..ff15f91affde 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/kstrtox.h>
 #include <linux/panic_notifier.h>
 #include <linux/reboot.h>
 #include <linux/ctype.h>
@@ -19,7 +20,9 @@
 #include <linux/gfp.h>
 #include <linux/crash_dump.h>
 #include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-extable.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
@@ -29,6 +32,7 @@
 #include <asm/sclp.h>
 #include <asm/checksum.h>
 #include <asm/debug.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
@@ -38,6 +42,8 @@
 
 #define IPL_UNKNOWN_STR		"unknown"
 #define IPL_CCW_STR		"ccw"
+#define IPL_ECKD_STR		"eckd"
+#define IPL_ECKD_DUMP_STR	"eckd_dump"
 #define IPL_FCP_STR		"fcp"
 #define IPL_FCP_DUMP_STR	"fcp_dump"
 #define IPL_NVME_STR		"nvme"
@@ -45,6 +51,7 @@
 #define IPL_NSS_STR		"nss"
 
 #define DUMP_CCW_STR		"ccw"
+#define DUMP_ECKD_STR		"eckd"
 #define DUMP_FCP_STR		"fcp"
 #define DUMP_NVME_STR		"nvme"
 #define DUMP_NONE_STR		"none"
@@ -91,6 +98,10 @@ static char *ipl_type_str(enum ipl_type type)
 	switch (type) {
 	case IPL_TYPE_CCW:
 		return IPL_CCW_STR;
+	case IPL_TYPE_ECKD:
+		return IPL_ECKD_STR;
+	case IPL_TYPE_ECKD_DUMP:
+		return IPL_ECKD_DUMP_STR;
 	case IPL_TYPE_FCP:
 		return IPL_FCP_STR;
 	case IPL_TYPE_FCP_DUMP:
@@ -112,6 +123,7 @@ enum dump_type {
 	DUMP_TYPE_CCW	= 2,
 	DUMP_TYPE_FCP	= 4,
 	DUMP_TYPE_NVME	= 8,
+	DUMP_TYPE_ECKD	= 16,
 };
 
 static char *dump_type_str(enum dump_type type)
@@ -121,6 +133,8 @@ static char *dump_type_str(enum dump_type type)
 		return DUMP_NONE_STR;
 	case DUMP_TYPE_CCW:
 		return DUMP_CCW_STR;
+	case DUMP_TYPE_ECKD:
+		return DUMP_ECKD_STR;
 	case DUMP_TYPE_FCP:
 		return DUMP_FCP_STR;
 	case DUMP_TYPE_NVME:
@@ -146,6 +160,7 @@ static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
 static struct ipl_parameter_block *reipl_block_fcp;
 static struct ipl_parameter_block *reipl_block_nvme;
 static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_eckd;
 static struct ipl_parameter_block *reipl_block_nss;
 static struct ipl_parameter_block *reipl_block_actual;
 
@@ -154,20 +169,24 @@ static enum dump_type dump_type = DUMP_TYPE_NONE;
 static struct ipl_parameter_block *dump_block_fcp;
 static struct ipl_parameter_block *dump_block_nvme;
 static struct ipl_parameter_block *dump_block_ccw;
+static struct ipl_parameter_block *dump_block_eckd;
 
 static struct sclp_ipl_info sclp_ipl_info;
 
 static bool reipl_nvme_clear;
 static bool reipl_fcp_clear;
 static bool reipl_ccw_clear;
+static bool reipl_eckd_clear;
 
-static inline int __diag308(unsigned long subcode, void *addr)
+static unsigned long os_info_flags;
+
+static inline int __diag308(unsigned long subcode, unsigned long addr)
 {
 	union register_pair r1;
 
-	r1.even = (unsigned long) addr;
+	r1.even = addr;
 	r1.odd	= 0;
-	asm volatile(
+	asm_inline volatile(
 		"	diag	%[r1],%[subcode],0x308\n"
 		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
@@ -180,7 +199,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
 int diag308(unsigned long subcode, void *addr)
 {
 	diag_stat_inc(DIAG_STAT_X308);
-	return __diag308(subcode, addr);
+	return __diag308(subcode, addr ? virt_to_phys(addr) : 0);
 }
 EXPORT_SYMBOL_GPL(diag308);
 
@@ -191,7 +210,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		char *page)						\
 {									\
-	return scnprintf(page, PAGE_SIZE, _format, ##args);		\
+	return sysfs_emit(page, _format, ##args);			\
 }
 
 #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk)			\
@@ -217,14 +236,14 @@ IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n",				\
 		 _ipl_blk.ssid, _ipl_blk.devno);			\
 IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk);			\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
-	__ATTR(_name, (S_IRUGO | S_IWUSR),				\
+	__ATTR(_name, 0644,						\
 	       sys_##_prefix##_##_name##_show,				\
 	       sys_##_prefix##_##_name##_store)				\
 
 #define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
 IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
-	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
+	__ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL)
 
 #define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
 IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)	\
@@ -239,7 +258,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 	return len;							\
 }									\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
-	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+	__ATTR(_name, 0644,						\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
@@ -249,15 +268,74 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
 		struct kobj_attribute *attr,				\
 		const char *buf, size_t len)				\
 {									\
-	strncpy(_value, buf, sizeof(_value) - 1);			\
+	if (len >= sizeof(_value))					\
+		return -E2BIG;						\
+	len = strscpy(_value, buf);					\
+	if ((ssize_t)len < 0)						\
+		return len;						\
 	strim(_value);							\
 	return len;							\
 }									\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
-	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+	__ATTR(_name, 0644,						\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)				\
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp,			\
+					    struct kobject *kobj,		\
+					    const struct bin_attribute *attr,	\
+					    char *buf, loff_t off,		\
+					    size_t count)			\
+{										\
+	size_t size = _ipl_block.scp_data_len;					\
+	void *scp_data = _ipl_block.scp_data;					\
+										\
+	return memory_read_from_buffer(buf, count, &off,			\
+				       scp_data, size);				\
+}
+
+#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp,		\
+					struct kobject *kobj,			\
+					const struct bin_attribute *attr,	\
+					char *buf, loff_t off,			\
+					size_t count)				\
+{										\
+	size_t scpdata_len = count;						\
+	size_t padding;								\
+										\
+	if (off)								\
+		return -EINVAL;							\
+										\
+	memcpy(_ipl_block.scp_data, buf, count);				\
+	if (scpdata_len % 8) {							\
+		padding = 8 - (scpdata_len % 8);				\
+		memset(_ipl_block.scp_data + scpdata_len,			\
+		       0, padding);						\
+		scpdata_len += padding;						\
+	}									\
+										\
+	_ipl_block_hdr.len = _ipl_bp_len + scpdata_len;				\
+	_ipl_block.len = _ipl_bp0_len + scpdata_len;				\
+	_ipl_block.scp_data_len = scpdata_len;					\
+										\
+	return count;								\
+}
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size)		\
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)				\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr =	\
+	__BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show,	\
+		   NULL, _size)
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block)					\
+IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr =		\
+	__BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show,		\
+		   sys_##_prefix##_scp_data_store, _size)
+
 /*
  * ipl section
  */
@@ -280,6 +358,11 @@ static __init enum ipl_type get_ipl_type(void)
 			return IPL_TYPE_NVME_DUMP;
 		else
 			return IPL_TYPE_NVME;
+	case IPL_PBT_ECKD:
+		if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+			return IPL_TYPE_ECKD_DUMP;
+		else
+			return IPL_TYPE_ECKD;
 	}
 	return IPL_TYPE_UNKNOWN;
 }
@@ -290,7 +373,7 @@ EXPORT_SYMBOL_GPL(ipl_info);
 static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 			     char *page)
 {
-	return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+	return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type));
 }
 
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
@@ -298,7 +381,7 @@ static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 static ssize_t ipl_secure_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%i\n", !!ipl_secure_flag);
+	return sysfs_emit(page, "%i\n", !!ipl_secure_flag);
 }
 
 static struct kobj_attribute sys_ipl_secure_attr =
@@ -307,7 +390,7 @@ static struct kobj_attribute sys_ipl_secure_attr =
 static ssize_t ipl_has_secure_show(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%i\n", !!sclp.has_sipl);
+	return sysfs_emit(page, "%i\n", !!sclp.has_sipl);
 }
 
 static struct kobj_attribute sys_ipl_has_secure_attr =
@@ -320,79 +403,69 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
 
 	if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
 		ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
-	return sprintf(page, "%s\n", parm);
+	return sysfs_emit(page, "%s\n", parm);
 }
 
 static struct kobj_attribute sys_ipl_vm_parm_attr =
-	__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+	__ATTR(parm, 0444, ipl_vm_parm_show, NULL);
 
 static ssize_t sys_ipl_device_show(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *page)
 {
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
-			       ipl_block.ccw.devno);
+		return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+				  ipl_block.ccw.devno);
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+				  ipl_block.eckd.devno);
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
-		return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+		return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno);
 	case IPL_TYPE_NVME:
 	case IPL_TYPE_NVME_DUMP:
-		return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+		return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid);
 	default:
 		return 0;
 	}
 }
 
 static struct kobj_attribute sys_ipl_device_attr =
-	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+	__ATTR(device, 0444, sys_ipl_device_show, NULL);
 
-static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr, char *buf,
-				  loff_t off, size_t count)
+static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				      const struct bin_attribute *attr, char *buf,
+				      loff_t off, size_t count)
 {
 	return memory_read_from_buffer(buf, count, &off, &ipl_block,
 				       ipl_block.hdr.len);
 }
-static struct bin_attribute ipl_parameter_attr =
-	__BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+static const struct bin_attribute sys_ipl_parameter_attr =
+	__BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
 		   PAGE_SIZE);
 
-static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t off, size_t count)
-{
-	unsigned int size = ipl_block.fcp.scp_data_len;
-	void *scp_data = &ipl_block.fcp.scp_data;
-
-	return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t off, size_t count)
-{
-	unsigned int size = ipl_block.nvme.scp_data_len;
-	void *scp_data = &ipl_block.nvme.scp_data;
-
-	return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
 
-static struct bin_attribute ipl_scp_data_attr =
-	__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
+	&sys_ipl_parameter_attr,
+	&sys_ipl_fcp_scp_data_attr,
+	NULL,
+};
 
-static struct bin_attribute ipl_nvme_scp_data_attr =
-	__BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
 
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
-	&ipl_parameter_attr,
-	&ipl_scp_data_attr,
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
+	&sys_ipl_parameter_attr,
+	&sys_ipl_nvme_scp_data_attr,
 	NULL,
 };
 
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
-	&ipl_parameter_attr,
-	&ipl_nvme_scp_data_attr,
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
+
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
+	&sys_ipl_parameter_attr,
+	&sys_ipl_eckd_scp_data_attr,
 	NULL,
 };
 
@@ -417,103 +490,180 @@ DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
 DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
 		   (unsigned long long)ipl_block.nvme.br_lba);
 
+/* ECKD ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.eckd.bootprog);
+
+#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj,		\
+					  struct kobj_attribute *attr,	\
+					  char *buf)			\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+									\
+	if (!ipb->br_chr.cyl &&						\
+	    !ipb->br_chr.head &&					\
+	    !ipb->br_chr.record)					\
+		return sysfs_emit(buf, "auto\n");			\
+									\
+	return sysfs_emit(buf, "0x%x,0x%x,0x%x\n",			\
+			  ipb->br_chr.cyl,				\
+			  ipb->br_chr.head,				\
+			  ipb->br_chr.record);				\
+}
+
+#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj,	\
+					   struct kobj_attribute *attr,	\
+					   const char *buf, size_t len)	\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+	unsigned long args[3] = { 0 };					\
+	char *p, *p1, *tmp = NULL;					\
+	int i, rc;							\
+									\
+	if (!strncmp(buf, "auto", 4))					\
+		goto out;						\
+									\
+	tmp = kstrdup(buf, GFP_KERNEL);					\
+	p = tmp;							\
+	for (i = 0; i < 3; i++) {					\
+		p1 = strsep(&p, ", ");					\
+		if (!p1) {						\
+			rc = -EINVAL;					\
+			goto err;					\
+		}							\
+		rc = kstrtoul(p1, 0, args + i);				\
+		if (rc)							\
+			goto err;					\
+	}								\
+									\
+	rc = -EINVAL;							\
+	if (i != 3)							\
+		goto err;						\
+									\
+	if ((args[0] || args[1]) && !args[2])				\
+		goto err;						\
+									\
+	if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255)	\
+		goto err;						\
+									\
+out:									\
+	ipb->br_chr.cyl = args[0];					\
+	ipb->br_chr.head = args[1];					\
+	ipb->br_chr.record = args[2];					\
+	rc = len;							\
+err:									\
+	kfree(tmp);							\
+	return rc;							\
+}
+
+IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd);
+static struct kobj_attribute sys_ipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL);
+
+IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd);
+
+static struct kobj_attribute sys_reipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store);
+
 static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *page)
 {
 	char loadparm[LOADPARM_LEN + 1] = {};
 
 	if (!sclp_ipl_info.is_valid)
-		return sprintf(page, "#unknown#\n");
+		return sysfs_emit(page, "#unknown#\n");
 	memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
 	EBCASC(loadparm, LOADPARM_LEN);
 	strim(loadparm);
-	return sprintf(page, "%s\n", loadparm);
+	return sysfs_emit(page, "%s\n", loadparm);
 }
 
 static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
 	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
 
 static struct attribute *ipl_fcp_attrs[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_fcp_wwpn_attr.attr,
 	&sys_ipl_fcp_lun_attr.attr,
 	&sys_ipl_fcp_bootprog_attr.attr,
 	&sys_ipl_fcp_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
 	.attrs = ipl_fcp_attrs,
-	.bin_attrs = ipl_fcp_bin_attrs,
+	.bin_attrs_new = ipl_fcp_bin_attrs,
 };
 
 static struct attribute *ipl_nvme_attrs[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_nvme_fid_attr.attr,
 	&sys_ipl_nvme_nsid_attr.attr,
 	&sys_ipl_nvme_bootprog_attr.attr,
 	&sys_ipl_nvme_br_lba_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
 	.attrs = ipl_nvme_attrs,
-	.bin_attrs = ipl_nvme_bin_attrs,
+	.bin_attrs_new = ipl_nvme_bin_attrs,
 };
 
+static struct attribute *ipl_eckd_attrs[] = {
+	&sys_ipl_eckd_bootprog_attr.attr,
+	&sys_ipl_eckd_br_chr_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_device_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ipl_eckd_attr_group = {
+	.attrs = ipl_eckd_attrs,
+	.bin_attrs_new = ipl_eckd_bin_attrs,
+};
 
 /* CCW ipl device attributes */
 
 static struct attribute *ipl_ccw_attrs_vm[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
 	&sys_ipl_vm_parm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
 static struct attribute *ipl_ccw_attrs_lpar[] = {
-	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
-	&sys_ipl_secure_attr.attr,
-	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
 	.attrs = ipl_ccw_attrs_vm,
 };
 
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
 	.attrs = ipl_ccw_attrs_lpar
 };
 
-/* UNKNOWN ipl device attributes */
-
-static struct attribute *ipl_unknown_attrs[] = {
+static struct attribute *ipl_common_attrs[] = {
 	&sys_ipl_type_attr.attr,
+	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_unknown_attr_group = {
-	.attrs = ipl_unknown_attrs,
+static const struct attribute_group ipl_common_attr_group = {
+	.attrs = ipl_common_attrs,
 };
 
 static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	__bpon();
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 }
 
@@ -531,15 +681,22 @@ static int __init ipl_init(void)
 		rc = -ENOMEM;
 		goto out;
 	}
+	rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+	if (rc)
+		goto out;
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		if (MACHINE_IS_VM)
+		if (machine_is_vm())
 			rc = sysfs_create_group(&ipl_kset->kobj,
 						&ipl_ccw_attr_group_vm);
 		else
 			rc = sysfs_create_group(&ipl_kset->kobj,
 						&ipl_ccw_attr_group_lpar);
 		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
+		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
@@ -549,8 +706,6 @@ static int __init ipl_init(void)
 		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
 		break;
 	default:
-		rc = sysfs_create_group(&ipl_kset->kobj,
-					&ipl_unknown_attr_group);
 		break;
 	}
 out:
@@ -577,7 +732,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
 	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
 
 	ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
-	return sprintf(page, "%s\n", vmparm);
+	return sysfs_emit(page, "%s\n", vmparm);
 }
 
 static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
@@ -641,54 +796,20 @@ static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
 }
 
 static struct kobj_attribute sys_reipl_nss_vmparm_attr =
-	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
-					reipl_nss_vmparm_store);
+	__ATTR(parm, 0644, reipl_nss_vmparm_show,
+	       reipl_nss_vmparm_store);
 static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
-	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
-					reipl_ccw_vmparm_store);
+	__ATTR(parm, 0644, reipl_ccw_vmparm_show,
+	       reipl_ccw_vmparm_store);
 
 /* FCP reipl device attributes */
 
-static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
-				      struct bin_attribute *attr,
-				      char *buf, loff_t off, size_t count)
-{
-	size_t size = reipl_block_fcp->fcp.scp_data_len;
-	void *scp_data = reipl_block_fcp->fcp.scp_data;
-
-	return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
-				       struct bin_attribute *attr,
-				       char *buf, loff_t off, size_t count)
-{
-	size_t scpdata_len = count;
-	size_t padding;
-
-
-	if (off)
-		return -EINVAL;
-
-	memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
-	if (scpdata_len % 8) {
-		padding = 8 - (scpdata_len % 8);
-		memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
-		       0, padding);
-		scpdata_len += padding;
-	}
-
-	reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
-	reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
-	reipl_block_fcp->fcp.scp_data_len = scpdata_len;
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
+			    reipl_block_fcp->fcp,
+			    IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+			    DIAG308_SCPDATA_SIZE);
 
-	return count;
-}
-static struct bin_attribute sys_reipl_fcp_scp_data_attr =
-	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
-		   reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
 	&sys_reipl_fcp_scp_data_attr,
 	NULL,
 };
@@ -719,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
 	char buf[LOADPARM_LEN + 1];
 
 	reipl_get_ascii_loadparm(buf, ipb);
-	return sprintf(page, "%s\n", buf);
+	return sysfs_emit(page, "%s\n", buf);
 }
 
 static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
@@ -750,35 +871,39 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
 	return len;
 }
 
-/* FCP wrapper */
-static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
-{
-	return reipl_generic_loadparm_show(reipl_block_fcp, page);
-}
-
-static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					const char *buf, size_t len)
-{
-	return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
-	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
-					    reipl_fcp_loadparm_store);
+#define DEFINE_GENERIC_LOADPARM(name)							\
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj,			\
+					    struct kobj_attribute *attr, char *page)	\
+{											\
+	return reipl_generic_loadparm_show(reipl_block_##name, page);			\
+}											\
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj,			\
+					     struct kobj_attribute *attr,		\
+					     const char *buf, size_t len)		\
+{											\
+	return reipl_generic_loadparm_store(reipl_block_##name, buf, len);		\
+}											\
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr =				\
+	__ATTR(loadparm, 0644, reipl_##name##_loadparm_show,				\
+	       reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
 
 static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
 				    struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%u\n", reipl_fcp_clear);
+	return sysfs_emit(page, "%u\n", reipl_fcp_clear);
 }
 
 static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
 				     struct kobj_attribute *attr,
 				     const char *buf, size_t len)
 {
-	if (strtobool(buf, &reipl_fcp_clear) < 0)
+	if (kstrtobool(buf, &reipl_fcp_clear) < 0)
 		return -EINVAL;
 	return len;
 }
@@ -793,9 +918,9 @@ static struct attribute *reipl_fcp_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
 	.attrs = reipl_fcp_attrs,
-	.bin_attrs = reipl_fcp_bin_attrs,
+	.bin_attrs_new = reipl_fcp_bin_attrs,
 };
 
 static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -803,46 +928,12 @@ static struct kobj_attribute sys_reipl_fcp_clear_attr =
 
 /* NVME reipl device attributes */
 
-static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
-				      struct bin_attribute *attr,
-				      char *buf, loff_t off, size_t count)
-{
-	size_t size = reipl_block_nvme->nvme.scp_data_len;
-	void *scp_data = reipl_block_nvme->nvme.scp_data;
-
-	return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
-				       struct bin_attribute *attr,
-				       char *buf, loff_t off, size_t count)
-{
-	size_t scpdata_len = count;
-	size_t padding;
-
-	if (off)
-		return -EINVAL;
-
-	memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
-	if (scpdata_len % 8) {
-		padding = 8 - (scpdata_len % 8);
-		memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
-		       0, padding);
-		scpdata_len += padding;
-	}
-
-	reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
-	reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
-	reipl_block_nvme->nvme.scp_data_len = scpdata_len;
-
-	return count;
-}
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
+			    reipl_block_nvme->nvme,
+			    IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+			    DIAG308_SCPDATA_SIZE);
 
-static struct bin_attribute sys_reipl_nvme_scp_data_attr =
-	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read,
-		   reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
 	&sys_reipl_nvme_scp_data_attr,
 	NULL,
 };
@@ -856,24 +947,6 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
 DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
 		   reipl_block_nvme->nvme.br_lba);
 
-/* nvme wrapper */
-static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
-{
-	return reipl_generic_loadparm_show(reipl_block_nvme, page);
-}
-
-static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					const char *buf, size_t len)
-{
-	return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
-	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show,
-					    reipl_nvme_loadparm_store);
-
 static struct attribute *reipl_nvme_attrs[] = {
 	&sys_reipl_nvme_fid_attr.attr,
 	&sys_reipl_nvme_nsid_attr.attr,
@@ -883,22 +956,22 @@ static struct attribute *reipl_nvme_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
 	.attrs = reipl_nvme_attrs,
-	.bin_attrs = reipl_nvme_bin_attrs
+	.bin_attrs_new = reipl_nvme_bin_attrs
 };
 
 static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%u\n", reipl_nvme_clear);
+	return sysfs_emit(page, "%u\n", reipl_nvme_clear);
 }
 
 static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
 				      struct kobj_attribute *attr,
 				      const char *buf, size_t len)
 {
-	if (strtobool(buf, &reipl_nvme_clear) < 0)
+	if (kstrtobool(buf, &reipl_nvme_clear) < 0)
 		return -EINVAL;
 	return len;
 }
@@ -909,49 +982,17 @@ static struct kobj_attribute sys_reipl_nvme_clear_attr =
 /* CCW reipl device attributes */
 DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
 
-/* NSS wrapper */
-static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
-{
-	return reipl_generic_loadparm_show(reipl_block_nss, page);
-}
-
-static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					const char *buf, size_t len)
-{
-	return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
-}
-
-/* CCW wrapper */
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
-{
-	return reipl_generic_loadparm_show(reipl_block_ccw, page);
-}
-
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					const char *buf, size_t len)
-{
-	return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
-	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
-					    reipl_ccw_loadparm_store);
-
 static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
 				    struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%u\n", reipl_ccw_clear);
+	return sysfs_emit(page, "%u\n", reipl_ccw_clear);
 }
 
 static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
 				     struct kobj_attribute *attr,
 				     const char *buf, size_t len)
 {
-	if (strtobool(buf, &reipl_ccw_clear) < 0)
+	if (kstrtobool(buf, &reipl_ccw_clear) < 0)
 		return -EINVAL;
 	return len;
 }
@@ -984,6 +1025,52 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
 	.attrs = reipl_ccw_attrs_lpar,
 };
 
+/* ECKD reipl device attributes */
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
+			    reipl_block_eckd->eckd,
+			    IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+			    DIAG308_SCPDATA_SIZE);
+
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
+	&sys_reipl_eckd_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_eckd->eckd.bootprog);
+
+static struct attribute *reipl_eckd_attrs[] = {
+	&sys_reipl_eckd_device_attr.attr,
+	&sys_reipl_eckd_bootprog_attr.attr,
+	&sys_reipl_eckd_br_chr_attr.attr,
+	&sys_reipl_eckd_loadparm_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group reipl_eckd_attr_group = {
+	.attrs = reipl_eckd_attrs,
+	.bin_attrs_new = reipl_eckd_bin_attrs
+};
+
+static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sysfs_emit(page, "%u\n", reipl_eckd_clear);
+}
+
+static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_eckd_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_eckd_clear_attr =
+	__ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store);
 
 /* NSS reipl device attributes */
 static void reipl_get_ascii_nss_name(char *dst,
@@ -1000,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj,
 	char nss_name[NSS_NAME_SIZE + 1] = {};
 
 	reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
-	return sprintf(page, "%s\n", nss_name);
+	return sysfs_emit(page, "%s\n", nss_name);
 }
 
 static ssize_t reipl_nss_name_store(struct kobject *kobj,
@@ -1031,12 +1118,8 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
 }
 
 static struct kobj_attribute sys_reipl_nss_name_attr =
-	__ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
-					reipl_nss_name_store);
-
-static struct kobj_attribute sys_reipl_nss_loadparm_attr =
-	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
-					    reipl_nss_loadparm_store);
+	__ATTR(name, 0644, reipl_nss_name_show,
+	       reipl_nss_name_store);
 
 static struct attribute *reipl_nss_attrs[] = {
 	&sys_reipl_nss_name_attr.attr,
@@ -1052,8 +1135,8 @@ static struct attribute_group reipl_nss_attr_group = {
 
 void set_os_info_reipl_block(void)
 {
-	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
-			  reipl_block_actual->hdr.len);
+	os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+			       reipl_block_actual->hdr.len);
 }
 
 /* reipl type */
@@ -1067,6 +1150,9 @@ static int reipl_set_type(enum ipl_type type)
 	case IPL_TYPE_CCW:
 		reipl_block_actual = reipl_block_ccw;
 		break;
+	case IPL_TYPE_ECKD:
+		reipl_block_actual = reipl_block_eckd;
+		break;
 	case IPL_TYPE_FCP:
 		reipl_block_actual = reipl_block_fcp;
 		break;
@@ -1086,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type)
 static ssize_t reipl_type_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+	return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type));
 }
 
 static ssize_t reipl_type_store(struct kobject *kobj,
@@ -1097,6 +1183,8 @@ static ssize_t reipl_type_store(struct kobject *kobj,
 
 	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
 		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_ECKD);
 	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
 		rc = reipl_set_type(IPL_TYPE_FCP);
 	else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
@@ -1112,6 +1200,7 @@ static struct kobj_attribute reipl_type_attr =
 static struct kset *reipl_kset;
 static struct kset *reipl_fcp_kset;
 static struct kset *reipl_nvme_kset;
+static struct kset *reipl_eckd_kset;
 
 static void __reipl_run(void *unused)
 {
@@ -1123,6 +1212,13 @@ static void __reipl_run(void *unused)
 		else
 			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 		break;
+	case IPL_TYPE_ECKD:
+		diag308(DIAG308_SET, reipl_block_eckd);
+		if (reipl_eckd_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
 	case IPL_TYPE_FCP:
 		diag308(DIAG308_SET, reipl_block_fcp);
 		if (reipl_fcp_clear)
@@ -1146,6 +1242,7 @@ static void __reipl_run(void *unused)
 		break;
 	case IPL_TYPE_FCP_DUMP:
 	case IPL_TYPE_NVME_DUMP:
+	case IPL_TYPE_ECKD_DUMP:
 		break;
 	}
 	disabled_wait();
@@ -1176,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
 	ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
 
 	/* VM PARM */
-	if (MACHINE_IS_VM && ipl_block_valid &&
+	if (machine_is_vm() && ipl_block_valid &&
 	    (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
 
 		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
@@ -1190,7 +1287,7 @@ static int __init reipl_nss_init(void)
 {
 	int rc;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 0;
 
 	reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
@@ -1215,8 +1312,8 @@ static int __init reipl_ccw_init(void)
 		return -ENOMEM;
 
 	rc = sysfs_create_group(&reipl_kset->kobj,
-				MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
-					      : &reipl_ccw_attr_group_lpar);
+				machine_is_vm() ? &reipl_ccw_attr_group_vm
+						: &reipl_ccw_attr_group_lpar);
 	if (rc)
 		return rc;
 
@@ -1343,6 +1440,58 @@ out1:
 	return rc;
 }
 
+static int __init reipl_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp.has_sipl_eckd)
+		return 0;
+
+	reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_eckd)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL,
+					      &reipl_kset->kobj);
+	if (!reipl_eckd_kset) {
+		free_page((unsigned long)reipl_block_eckd);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_eckd_kset->kobj,
+				       &sys_reipl_eckd_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_eckd_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_ECKD) {
+		memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block));
+	} else {
+		reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+		reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+		reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+		reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_ECKD;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+out1:
+	kset_unregister(reipl_eckd_kset);
+	free_page((unsigned long)reipl_block_eckd);
+	return rc;
+}
+
 static int __init reipl_type_init(void)
 {
 	enum ipl_type reipl_type = ipl_info.type;
@@ -1364,6 +1513,9 @@ static int __init reipl_type_init(void)
 	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
 		memcpy(reipl_block_ccw, reipl_block, size);
 		reipl_type = IPL_TYPE_CCW;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) {
+		memcpy(reipl_block_eckd, reipl_block, size);
+		reipl_type = IPL_TYPE_ECKD;
 	}
 out:
 	return reipl_set_type(reipl_type);
@@ -1384,6 +1536,9 @@ static int __init reipl_init(void)
 	rc = reipl_ccw_init();
 	if (rc)
 		return rc;
+	rc = reipl_eckd_init();
+	if (rc)
+		return rc;
 	rc = reipl_fcp_init();
 	if (rc)
 		return rc;
@@ -1419,6 +1574,11 @@ DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
 DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
 		   dump_block_fcp->fcp.devno);
 
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
+			    dump_block_fcp->fcp,
+			    IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+			    DIAG308_SCPDATA_SIZE);
+
 static struct attribute *dump_fcp_attrs[] = {
 	&sys_dump_fcp_device_attr.attr,
 	&sys_dump_fcp_wwpn_attr.attr,
@@ -1428,9 +1588,15 @@ static struct attribute *dump_fcp_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group dump_fcp_attr_group = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
+	&sys_dump_fcp_scp_data_attr,
+	NULL,
+};
+
+static const struct attribute_group dump_fcp_attr_group = {
 	.name  = IPL_FCP_STR,
 	.attrs = dump_fcp_attrs,
+	.bin_attrs_new = dump_fcp_bin_attrs,
 };
 
 /* NVME dump device attributes */
@@ -1443,6 +1609,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
 DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
 		   dump_block_nvme->nvme.br_lba);
 
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
+			    dump_block_nvme->nvme,
+			    IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+			    DIAG308_SCPDATA_SIZE);
+
 static struct attribute *dump_nvme_attrs[] = {
 	&sys_dump_nvme_fid_attr.attr,
 	&sys_dump_nvme_nsid_attr.attr,
@@ -1451,9 +1622,49 @@ static struct attribute *dump_nvme_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group dump_nvme_attr_group = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
+	&sys_dump_nvme_scp_data_attr,
+	NULL,
+};
+
+static const struct attribute_group dump_nvme_attr_group = {
 	.name  = IPL_NVME_STR,
 	.attrs = dump_nvme_attrs,
+	.bin_attrs_new = dump_nvme_bin_attrs,
+};
+
+/* ECKD dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+		   dump_block_eckd->eckd.bootprog);
+
+IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
+
+static struct kobj_attribute sys_dump_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_eckd, dump_block_eckd->hdr,
+			    dump_block_eckd->eckd,
+			    IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+			    DIAG308_SCPDATA_SIZE);
+
+static struct attribute *dump_eckd_attrs[] = {
+	&sys_dump_eckd_device_attr.attr,
+	&sys_dump_eckd_bootprog_attr.attr,
+	&sys_dump_eckd_br_chr_attr.attr,
+	NULL,
+};
+
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
+	&sys_dump_eckd_scp_data_attr,
+	NULL,
+};
+
+static const struct attribute_group dump_eckd_attr_group = {
+	.name  = IPL_ECKD_STR,
+	.attrs = dump_eckd_attrs,
+	.bin_attrs_new = dump_eckd_bin_attrs,
 };
 
 /* CCW dump device attributes */
@@ -1482,7 +1693,7 @@ static int dump_set_type(enum dump_type type)
 static ssize_t dump_type_show(struct kobject *kobj,
 			      struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", dump_type_str(dump_type));
+	return sysfs_emit(page, "%s\n", dump_type_str(dump_type));
 }
 
 static ssize_t dump_type_store(struct kobject *kobj,
@@ -1495,6 +1706,8 @@ static ssize_t dump_type_store(struct kobject *kobj,
 		rc = dump_set_type(DUMP_TYPE_NONE);
 	else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
 		rc = dump_set_type(DUMP_TYPE_CCW);
+	else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_ECKD);
 	else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
 		rc = dump_set_type(DUMP_TYPE_FCP);
 	else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
@@ -1505,6 +1718,24 @@ static ssize_t dump_type_store(struct kobject *kobj,
 static struct kobj_attribute dump_type_attr =
 	__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
 
+static ssize_t dump_area_size_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	return sysfs_emit(page, "%lu\n", sclp.hsa_size);
+}
+
+static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size);
+
+static struct attribute *dump_attrs[] = {
+	&dump_type_attr.attr,
+	&dump_area_size_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_attr_group = {
+	.attrs = dump_attrs,
+};
+
 static struct kset *dump_kset;
 
 static void diag308_dump(void *dump_block)
@@ -1523,6 +1754,9 @@ static void __dump_run(void *unused)
 	case DUMP_TYPE_CCW:
 		diag308_dump(dump_block_ccw);
 		break;
+	case DUMP_TYPE_ECKD:
+		diag308_dump(dump_block_eckd);
+		break;
 	case DUMP_TYPE_FCP:
 		diag308_dump(dump_block_fcp);
 		break;
@@ -1601,13 +1835,36 @@ static int __init dump_nvme_init(void)
 	}
 	dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
 	dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
-	dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
-	dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
-	dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+	dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+	dump_block_nvme->nvme.pbt = IPL_PBT_NVME;
+	dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP;
 	dump_capabilities |= DUMP_TYPE_NVME;
 	return 0;
 }
 
+static int __init dump_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_eckd)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_eckd);
+		return rc;
+	}
+	dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+	dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+	dump_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+	dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_ECKD;
+	return 0;
+}
+
 static int __init dump_init(void)
 {
 	int rc;
@@ -1615,7 +1872,7 @@ static int __init dump_init(void)
 	dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
 	if (!dump_kset)
 		return -ENOMEM;
-	rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group);
 	if (rc) {
 		kset_unregister(dump_kset);
 		return rc;
@@ -1623,6 +1880,9 @@ static int __init dump_init(void)
 	rc = dump_ccw_init();
 	if (rc)
 		return rc;
+	rc = dump_eckd_init();
+	if (rc)
+		return rc;
 	rc = dump_fcp_init();
 	if (rc)
 		return rc;
@@ -1641,13 +1901,28 @@ static struct shutdown_action __refdata dump_action = {
 
 static void dump_reipl_run(struct shutdown_trigger *trigger)
 {
-	unsigned long ipib = (unsigned long) reipl_block_actual;
+	struct lowcore *abs_lc;
 	unsigned int csum;
 
-	csum = (__force unsigned int)
-	       csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
-	put_abs_lowcore(ipib, ipib);
-	put_abs_lowcore(ipib_checksum, csum);
+	/*
+	 * Set REIPL_CLEAR flag in os_info flags entry indicating
+	 * 'clear' sysfs attribute has been set on the panicked system
+	 * for specified reipl type.
+	 * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+	 */
+	if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+	    (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+	    (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+	    (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+	    reipl_type == IPL_TYPE_NSS ||
+	    reipl_type == IPL_TYPE_UNKNOWN)
+		os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+	os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+	csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+	abs_lc = get_abs_lowcore();
+	abs_lc->ipib = __pa(reipl_block_actual);
+	abs_lc->ipib_checksum = csum;
+	put_abs_lowcore(abs_lc);
 	dump_run(trigger);
 }
 
@@ -1660,11 +1935,13 @@ static struct shutdown_action __refdata dump_reipl_action = {
  * vmcmd shutdown action: Trigger vm command on shutdown.
  */
 
-static char vmcmd_on_reboot[128];
-static char vmcmd_on_panic[128];
-static char vmcmd_on_halt[128];
-static char vmcmd_on_poff[128];
-static char vmcmd_on_restart[128];
+#define VMCMD_MAX_SIZE	240
+
+static char vmcmd_on_reboot[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_panic[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_halt[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_poff[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_restart[VMCMD_MAX_SIZE + 1];
 
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
 DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
@@ -1711,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
 
 static int vmcmd_init(void)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -EOPNOTSUPP;
 	vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
 	if (!vmcmd_kset)
@@ -1776,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
 static ssize_t on_reboot_show(struct kobject *kobj,
 			      struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+	return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name);
 }
 
 static ssize_t on_reboot_store(struct kobject *kobj,
@@ -1802,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
 static ssize_t on_panic_show(struct kobject *kobj,
 			     struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", on_panic_trigger.action->name);
+	return sysfs_emit(page, "%s\n", on_panic_trigger.action->name);
 }
 
 static ssize_t on_panic_store(struct kobject *kobj,
@@ -1828,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
 static ssize_t on_restart_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", on_restart_trigger.action->name);
+	return sysfs_emit(page, "%s\n", on_restart_trigger.action->name);
 }
 
 static ssize_t on_restart_store(struct kobject *kobj,
@@ -1854,7 +2131,7 @@ void do_restart(void *arg)
 	tracing_off();
 	debug_locks_off();
 	lgr_info_log();
-	smp_call_online_cpu(__do_restart, arg);
+	smp_call_ipl_cpu(__do_restart, arg);
 }
 
 /* on halt */
@@ -1864,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
 static ssize_t on_halt_show(struct kobject *kobj,
 			    struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", on_halt_trigger.action->name);
+	return sysfs_emit(page, "%s\n", on_halt_trigger.action->name);
 }
 
 static ssize_t on_halt_store(struct kobject *kobj,
@@ -1890,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
 static ssize_t on_poff_show(struct kobject *kobj,
 			    struct kobj_attribute *attr, char *page)
 {
-	return sprintf(page, "%s\n", on_poff_trigger.action->name);
+	return sysfs_emit(page, "%s\n", on_poff_trigger.action->name);
 }
 
 static ssize_t on_poff_store(struct kobject *kobj,
@@ -1972,26 +2249,28 @@ static int __init s390_ipl_init(void)
 
 __initcall(s390_ipl_init);
 
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
 {
 	int sx, dx;
 
-	dx = 0;
-	for (sx = 0; src[sx] != 0; sx++) {
+	if (!n)
+		return;
+	for (sx = 0, dx = 0; src[sx]; sx++) {
 		if (src[sx] == '"')
 			continue;
-		dst[dx++] = src[sx];
-		if (dx >= n)
+		dst[dx] = src[sx];
+		if (dx + 1 == n)
 			break;
+		dx++;
 	}
+	dst[dx] = '\0';
 }
 
 static int __init vmcmd_on_reboot_setup(char *str)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 1;
-	strncpy_skip_quote(vmcmd_on_reboot, str, 127);
-	vmcmd_on_reboot[127] = 0;
+	strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
 	on_reboot_trigger.action = &vmcmd_action;
 	return 1;
 }
@@ -1999,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup);
 
 static int __init vmcmd_on_panic_setup(char *str)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 1;
-	strncpy_skip_quote(vmcmd_on_panic, str, 127);
-	vmcmd_on_panic[127] = 0;
+	strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
 	on_panic_trigger.action = &vmcmd_action;
 	return 1;
 }
@@ -2010,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup);
 
 static int __init vmcmd_on_halt_setup(char *str)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 1;
-	strncpy_skip_quote(vmcmd_on_halt, str, 127);
-	vmcmd_on_halt[127] = 0;
+	strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
 	on_halt_trigger.action = &vmcmd_action;
 	return 1;
 }
@@ -2021,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup);
 
 static int __init vmcmd_on_poff_setup(char *str)
 {
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return 1;
-	strncpy_skip_quote(vmcmd_on_poff, str, 127);
-	vmcmd_on_poff[127] = 0;
+	strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
 	on_poff_trigger.action = &vmcmd_action;
 	return 1;
 }
@@ -2052,6 +2328,11 @@ void __init setup_ipl(void)
 		ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
 		ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
 		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid;
+		ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno;
+		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		ipl_info.data.fcp.dev_id.ssid = 0;
@@ -2078,7 +2359,7 @@ void s390_reset_system(void)
 	set_prefix(0);
 
 	/* Disable lowcore protection */
-	__ctl_clear_bit(0, 28);
+	local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
 	diag_amode31_ops.diag308_reset();
 }
 
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 45393919fe61..bdf9c7cb5685 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
@@ -25,10 +26,13 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/lowcore.h>
+#include <asm/machine.h>
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 #include <asm/stacktrace.h>
 #include <asm/softirq_stack.h>
+#include <asm/vtime.h>
+#include <asm/asm.h>
 #include "entry.h"
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -75,13 +79,13 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
 	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
 	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+	{.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
 	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
 	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
 	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
 	{.irq = IRQIO_C70,  .name = "C70", .desc = "[I/O] 3270"},
 	{.irq = IRQIO_TAP,  .name = "TAP", .desc = "[I/O] Tape"},
 	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
-	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
 	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
 	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
 	{.irq = IRQIO_CSC,  .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
@@ -99,8 +103,8 @@ static const struct irq_class irqclass_sub_desc[] = {
 
 static void do_IRQ(struct pt_regs *regs, int irq)
 {
-	if (tod_after_eq(S390_lowcore.int_clock,
-			 S390_lowcore.clock_comparator))
+	if (tod_after_eq(get_lowcore()->int_clock,
+			 get_lowcore()->clock_comparator))
 		/* Serve timer interrupts first. */
 		clock_comparator_work();
 	generic_handle_irq(irq);
@@ -110,7 +114,7 @@ static int on_async_stack(void)
 {
 	unsigned long frame = current_frame_address();
 
-	return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+	return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
 }
 
 static void do_irq_async(struct pt_regs *regs, int irq)
@@ -118,7 +122,7 @@ static void do_irq_async(struct pt_regs *regs, int irq)
 	if (on_async_stack()) {
 		do_IRQ(regs, irq);
 	} else {
-		call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+		call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
 			      struct pt_regs *, regs, int, irq);
 	}
 }
@@ -127,36 +131,41 @@ static int irq_pending(struct pt_regs *regs)
 {
 	int cc;
 
-	asm volatile("tpi 0\n"
-		     "ipm %0" : "=d" (cc) : : "cc");
-	return cc >> 28;
+	asm volatile(
+		"	tpi	 0\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		:
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
 void noinstr do_io_irq(struct pt_regs *regs)
 {
 	irqentry_state_t state = irqentry_enter(regs);
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	int from_idle;
+	bool from_idle;
 
 	irq_enter_rcu();
 
 	if (user_mode(regs)) {
 		update_timer_sys();
-		if (static_branch_likely(&cpu_has_bear))
+		if (cpu_has_bear())
 			current->thread.last_break = regs->last_break;
 	}
 
-	from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
 	if (from_idle)
 		account_idle_time_irq();
 
+	set_cpu_flag(CIF_NOHZ_DELAY);
 	do {
-		regs->tpi_info = S390_lowcore.tpi_info;
-		if (S390_lowcore.tpi_info.adapter_IO)
+		regs->tpi_info = get_lowcore()->tpi_info;
+		if (get_lowcore()->tpi_info.adapter_IO)
 			do_irq_async(regs, THIN_INTERRUPT);
 		else
 			do_irq_async(regs, IO_INTERRUPT);
-	} while (MACHINE_IS_LPAR && irq_pending(regs));
+	} while (machine_is_lpar() && irq_pending(regs));
 
 	irq_exit_rcu();
 
@@ -171,21 +180,21 @@ void noinstr do_ext_irq(struct pt_regs *regs)
 {
 	irqentry_state_t state = irqentry_enter(regs);
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	int from_idle;
+	bool from_idle;
 
 	irq_enter_rcu();
 
 	if (user_mode(regs)) {
 		update_timer_sys();
-		if (static_branch_likely(&cpu_has_bear))
+		if (cpu_has_bear())
 			current->thread.last_break = regs->last_break;
 	}
 
-	regs->int_code = S390_lowcore.ext_int_code_addr;
-	regs->int_parm = S390_lowcore.ext_params;
-	regs->int_parm_long = S390_lowcore.ext_params2;
+	regs->int_code = get_lowcore()->ext_int_code_addr;
+	regs->int_parm = get_lowcore()->ext_params;
+	regs->int_parm_long = get_lowcore()->ext_params2;
 
-	from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
 	if (from_idle)
 		account_idle_time_irq();
 
@@ -250,7 +259,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		goto out;
 	}
-	if (index < nr_irqs) {
+	if (index < irq_get_nr_irqs()) {
 		show_msi_interrupt(p, index);
 		goto out;
 	}
@@ -385,7 +394,7 @@ void irq_subclass_register(enum irq_subclass subclass)
 {
 	spin_lock(&irq_subclass_lock);
 	if (!irq_subclass_refcount[subclass])
-		ctl_set_bit(0, subclass);
+		system_ctl_set_bit(0, subclass);
 	irq_subclass_refcount[subclass]++;
 	spin_unlock(&irq_subclass_lock);
 }
@@ -396,7 +405,7 @@ void irq_subclass_unregister(enum irq_subclass subclass)
 	spin_lock(&irq_subclass_lock);
 	irq_subclass_refcount[subclass]--;
 	if (!irq_subclass_refcount[subclass])
-		ctl_clear_bit(0, subclass);
+		system_ctl_clear_bit(0, subclass);
 	spin_unlock(&irq_subclass_lock);
 }
 EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 9da6fa30c447..4d364de43799 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -40,8 +40,10 @@ static int kexec_file_add_kernel_elf(struct kimage *image,
 		buf.bufsz = phdr->p_filesz;
 
 		buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+#ifdef CONFIG_CRASH_DUMP
 		if (image->type == KEXEC_TYPE_CRASH)
 			buf.mem += crashk_res.start;
+#endif
 		buf.memsz = phdr->p_memsz;
 		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index af23eff5774d..a32ce8bea745 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -24,8 +24,10 @@ static int kexec_file_add_kernel_image(struct kimage *image,
 	buf.bufsz = image->kernel_buf_len;
 
 	buf.mem = 0;
+#ifdef CONFIG_CRASH_DUMP
 	if (image->type == KEXEC_TYPE_CRASH)
 		buf.mem += crashk_res.start;
+#endif
 	buf.memsz = buf.bufsz;
 
 	data->kernel_buf = image->kernel_buf;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 0032bdbe8e3f..c450120b4474 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,11 +9,11 @@
 
 #define pr_fmt(fmt) "kprobes: " fmt
 
-#include <linux/moduleloader.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
 #include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/extable.h>
@@ -21,6 +21,8 @@
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/ftrace.h>
+#include <linux/execmem.h>
+#include <asm/text-patching.h>
 #include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/dis.h>
@@ -31,41 +33,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = { };
 
-DEFINE_INSN_CACHE_OPS(s390_insn);
-
-static int insn_page_in_use;
-
 void *alloc_insn_page(void)
 {
 	void *page;
 
-	page = module_alloc(PAGE_SIZE);
+	page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
 	if (!page)
 		return NULL;
-	__set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X);
+	set_memory_rox((unsigned long)page, 1);
 	return page;
 }
 
-static void *alloc_s390_insn_page(void)
-{
-	if (xchg(&insn_page_in_use, 1) == 1)
-		return NULL;
-	return &kprobes_insn_page;
-}
-
-static void free_s390_insn_page(void *page)
-{
-	xchg(&insn_page_in_use, 0);
-}
-
-struct kprobe_insn_cache kprobe_s390_insn_slots = {
-	.mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
-	.alloc = alloc_s390_insn_page,
-	.free = free_s390_insn_page,
-	.pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
-	.insn_size = MAX_INSN_SIZE,
-};
-
 static void copy_instruction(struct kprobe *p)
 {
 	kprobe_opcode_t insn[MAX_INSN_SIZE];
@@ -79,10 +57,10 @@ static void copy_instruction(struct kprobe *p)
 	if (probe_is_insn_relative_long(&insn[0])) {
 		/*
 		 * For pc-relative instructions in RIL-b or RIL-c format patch
-		 * the RI2 displacement field. We have already made sure that
-		 * the insn slot for the patched instruction is within the same
-		 * 2GB area as the original instruction (either kernel image or
-		 * module area). Therefore the new displacement will always fit.
+		 * the RI2 displacement field. The insn slot for the to be
+		 * patched instruction is within the same 4GB area like the
+		 * original instruction. Therefore the new displacement will
+		 * always fit.
 		 */
 		disp = *(s32 *)&insn[1];
 		addr = (u64)(unsigned long)p->addr;
@@ -94,34 +72,6 @@ static void copy_instruction(struct kprobe *p)
 }
 NOKPROBE_SYMBOL(copy_instruction);
 
-static int s390_get_insn_slot(struct kprobe *p)
-{
-	/*
-	 * Get an insn slot that is within the same 2GB area like the original
-	 * instruction. That way instructions with a 32bit signed displacement
-	 * field can be patched and executed within the insn slot.
-	 */
-	p->ainsn.insn = NULL;
-	if (is_kernel((unsigned long)p->addr))
-		p->ainsn.insn = get_s390_insn_slot();
-	else if (is_module_addr(p->addr))
-		p->ainsn.insn = get_insn_slot();
-	return p->ainsn.insn ? 0 : -ENOMEM;
-}
-NOKPROBE_SYMBOL(s390_get_insn_slot);
-
-static void s390_free_insn_slot(struct kprobe *p)
-{
-	if (!p->ainsn.insn)
-		return;
-	if (is_kernel((unsigned long)p->addr))
-		free_s390_insn_slot(p->ainsn.insn, 0);
-	else
-		free_insn_slot(p->ainsn.insn, 0);
-	p->ainsn.insn = NULL;
-}
-NOKPROBE_SYMBOL(s390_free_insn_slot);
-
 /* Check if paddr is at an instruction boundary */
 static bool can_probe(unsigned long paddr)
 {
@@ -175,7 +125,8 @@ int arch_prepare_kprobe(struct kprobe *p)
 	/* Make sure the probe isn't going on a difficult instruction */
 	if (probe_is_prohibited_opcode(p->addr))
 		return -EINVAL;
-	if (s390_get_insn_slot(p))
+	p->ainsn.insn = get_insn_slot();
+	if (!p->ainsn.insn)
 		return -ENOMEM;
 	copy_instruction(p);
 	return 0;
@@ -203,7 +154,12 @@ void arch_arm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
 
-	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	if (cpu_has_seq_insn()) {
+		swap_instruction(&args);
+		text_poke_sync();
+	} else {
+		stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	}
 }
 NOKPROBE_SYMBOL(arch_arm_kprobe);
 
@@ -211,13 +167,21 @@ void arch_disarm_kprobe(struct kprobe *p)
 {
 	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
 
-	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	if (cpu_has_seq_insn()) {
+		swap_instruction(&args);
+		text_poke_sync();
+	} else {
+		stop_machine_cpuslocked(swap_instruction, &args, NULL);
+	}
 }
 NOKPROBE_SYMBOL(arch_disarm_kprobe);
 
 void arch_remove_kprobe(struct kprobe *p)
 {
-	s390_free_insn_slot(p);
+	if (!p->ainsn.insn)
+		return;
+	free_insn_slot(p->ainsn.insn, 0);
+	p->ainsn.insn = NULL;
 }
 NOKPROBE_SYMBOL(arch_remove_kprobe);
 
@@ -225,20 +189,27 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
 			      struct pt_regs *regs,
 			      unsigned long ip)
 {
-	struct per_regs per_kprobe;
+	union {
+		struct ctlreg regs[3];
+		struct {
+			struct ctlreg control;
+			struct ctlreg start;
+			struct ctlreg end;
+		};
+	} per_kprobe;
 
 	/* Set up the PER control registers %cr9-%cr11 */
-	per_kprobe.control = PER_EVENT_IFETCH;
-	per_kprobe.start = ip;
-	per_kprobe.end = ip;
+	per_kprobe.control.val = PER_EVENT_IFETCH;
+	per_kprobe.start.val = ip;
+	per_kprobe.end.val = ip;
 
 	/* Save control regs and psw mask */
-	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+	__local_ctl_store(9, 11, kcb->kprobe_saved_ctl);
 	kcb->kprobe_saved_imask = regs->psw.mask &
 		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* Set PER control regs, turns on single step for the given address */
-	__ctl_load(per_kprobe, 9, 11);
+	__local_ctl_load(9, 11, per_kprobe.regs);
 	regs->psw.mask |= PSW_MASK_PER;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 	regs->psw.addr = ip;
@@ -250,7 +221,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
 			       unsigned long ip)
 {
 	/* Restore control regs and psw mask, set new psw address */
-	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	__local_ctl_load(9, 11, kcb->kprobe_saved_ctl);
 	regs->psw.mask &= ~PSW_MASK_PER;
 	regs->psw.mask |= kcb->kprobe_saved_imask;
 	regs->psw.addr = ip;
@@ -279,19 +250,10 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
 {
 	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
 	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->prev_kprobe.kp = NULL;
 }
 NOKPROBE_SYMBOL(pop_kprobe);
 
-void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
-{
-	ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
-	ri->fp = (void *)regs->gprs[15];
-
-	/* Replace the return addr with trampoline addr */
-	regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
-}
-NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-
 static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
 {
 	switch (kcb->kprobe_status) {
@@ -372,26 +334,6 @@ static int kprobe_handler(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(kprobe_handler);
 
-void arch_kretprobe_fixup_return(struct pt_regs *regs,
-				 kprobe_opcode_t *correct_ret_addr)
-{
-	/* Replace fake return address with real one. */
-	regs->gprs[14] = (unsigned long)correct_ret_addr;
-}
-NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
-
-/*
- * Called from __kretprobe_trampoline
- */
-void trampoline_probe_handler(struct pt_regs *regs)
-{
-	kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
-}
-NOKPROBE_SYMBOL(trampoline_probe_handler);
-
-/* assembler function that handles the kretprobes must not be probed itself */
-NOKPROBE_SYMBOL(__kretprobe_trampoline);
-
 /*
  * Called after single-stepping.  p->addr is the address of the
  * instruction whose first byte has been replaced by the "breakpoint"
@@ -433,12 +375,11 @@ static int post_kprobe_handler(struct pt_regs *regs)
 	if (!p)
 		return 0;
 
+	resume_execution(p, regs);
 	if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
 		kcb->kprobe_status = KPROBE_HIT_SSDONE;
 		p->post_handler(p, regs, 0);
 	}
-
-	resume_execution(p, regs);
 	pop_kprobe(kcb);
 	preempt_enable_no_resched();
 
@@ -549,6 +490,12 @@ int __init arch_init_kprobes(void)
 	return 0;
 }
 
+int __init arch_populate_kprobe_blacklist(void)
+{
+	return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+					 (unsigned long)__irqentry_text_end);
+}
+
 int arch_trampoline_kprobe(struct kprobe *p)
 {
 	return 0;
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
deleted file mode 100644
index f6cb022ef8c8..000000000000
--- a/arch/s390/kernel/kprobes_insn_page.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-
-/*
- * insn_page is a special 4k aligned dummy function for kprobes.
- * It will contain all kprobed instructions that are out-of-line executed.
- * The page must be within the kernel image to guarantee that the
- * out-of-line instructions are within 2GB distance of their original
- * location. Using a dummy function ensures that the insn_page is within
- * the text section of the kernel and mapped read-only/executable from
- * the beginning on, thus avoiding to split large mappings if the page
- * would be in the data section instead.
- */
-	.section .kprobes.text, "ax"
-	.align 4096
-ENTRY(kprobes_insn_page)
-	.rept 2048
-	.word 0x07fe
-	.endr
-ENDPROC(kprobes_insn_page)
-	.previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6652e54cf3db..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -166,7 +166,7 @@ static struct timer_list lgr_timer;
  */
 static void lgr_timer_set(void)
 {
-	mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+	mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
 }
 
 /*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..baeb3dcfc1c8 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,7 +13,12 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
+#include <linux/cpufeature.h>
+#include <asm/guarded_storage.h>
+#include <asm/machine.h>
+#include <asm/pfault.h>
 #include <asm/cio.h>
+#include <asm/fpu.h>
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/ipl.h>
@@ -21,15 +26,15 @@
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
 #include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
 #include <asm/set_memory.h>
 #include <asm/stacktrace.h>
-#include <asm/switch_to.h>
 #include <asm/nmi.h>
 #include <asm/sclp.h>
 
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long,
-				  unsigned long);
+typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
 
 extern const unsigned char relocate_kernel[];
 extern const unsigned long long relocate_kernel_len;
@@ -40,13 +45,16 @@ extern const unsigned long long relocate_kernel_len;
  * Reset the system, copy boot CPU registers to absolute zero,
  * and jump to the kdump image
  */
-static void __do_machine_kdump(void *image)
+static void __do_machine_kdump(void *data)
 {
-	int (*start_kdump)(int);
+	struct kimage *image = data;
+	purgatory_t purgatory;
 	unsigned long prefix;
 
+	purgatory = (purgatory_t)image->start;
+
 	/* store_status() saved the prefix register to lowcore */
-	prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+	prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
 
 	/* Now do the reset  */
 	s390_reset_system();
@@ -56,14 +64,12 @@ static void __do_machine_kdump(void *image)
 	 * This need to be done *after* s390_reset_system set the
 	 * prefix register of this CPU to zero
 	 */
-	memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
-	       (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
+	memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
+	       phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
 
-	__load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
-	start_kdump = (void *)((struct kimage *) image)->start;
-	start_kdump(1);
+	call_nodat(1, int, purgatory, int, 1);
 
-	/* Die if start_kdump returns */
+	/* Die if kdump returns */
 	disabled_wait();
 }
 
@@ -87,16 +93,16 @@ static noinline void __machine_kdump(void *image)
 			continue;
 	}
 	/* Store status of the boot CPU */
-	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
-	if (MACHINE_HAS_VX)
+	mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
+	if (cpu_has_vx())
 		save_vx_regs((__vector128 *) mcesa->vector_save_area);
-	if (MACHINE_HAS_GS) {
-		__ctl_store(cr2_old.val, 2, 2);
+	if (cpu_has_gs()) {
+		local_ctl_store(2, &cr2_old.reg);
 		cr2_new = cr2_old;
 		cr2_new.gse = 1;
-		__ctl_load(cr2_new.val, 2, 2);
+		local_ctl_load(2, &cr2_new.reg);
 		save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
-		__ctl_load(cr2_old.val, 2, 2);
+		local_ctl_load(2, &cr2_old.reg);
 	}
 	/*
 	 * To create a good backchain for this CPU in the dump store_status
@@ -110,18 +116,6 @@ static noinline void __machine_kdump(void *image)
 	store_status(__do_machine_kdump, image);
 }
 
-static unsigned long do_start_kdump(unsigned long addr)
-{
-	struct kimage *image = (struct kimage *) addr;
-	int (*start_kdump)(int) = (void *)image->start;
-	int rc;
-
-	__arch_local_irq_stnsm(0xfb); /* disable DAT */
-	rc = start_kdump(0);
-	__arch_local_irq_stosm(0x04); /* enable DAT */
-	return rc;
-}
-
 #endif /* CONFIG_CRASH_DUMP */
 
 /*
@@ -130,12 +124,10 @@ static unsigned long do_start_kdump(unsigned long addr)
 static bool kdump_csum_valid(struct kimage *image)
 {
 #ifdef CONFIG_CRASH_DUMP
+	purgatory_t purgatory = (purgatory_t)image->start;
 	int rc;
 
-	preempt_disable();
-	rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump,
-			   unsigned long, (unsigned long)image);
-	preempt_enable();
+	rc = call_nodat(1, int, purgatory, int, 0);
 	return rc == 0;
 #else
 	return false;
@@ -188,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void)
 static int machine_kexec_prepare_kdump(void)
 {
 #ifdef CONFIG_CRASH_DUMP
-	if (MACHINE_IS_VM)
+	if (machine_is_vm())
 		diag10_range(PFN_DOWN(crashk_res.start),
 			     PFN_DOWN(crashk_res.end - crashk_res.start + 1));
 	return 0;
@@ -209,7 +201,7 @@ int machine_kexec_prepare(struct kimage *image)
 		return -EINVAL;
 
 	/* Get the destination where the assembler code should be copied to.*/
-	reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+	reboot_code_buffer = page_to_virt(image->control_code_page);
 
 	/* Then copy it */
 	memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
@@ -220,17 +212,6 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
-void arch_crash_save_vmcoreinfo(void)
-{
-	VMCOREINFO_SYMBOL(lowcore_ptr);
-	VMCOREINFO_SYMBOL(high_memory);
-	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-	vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
-	vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
-	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-	put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
-}
-
 void machine_shutdown(void)
 {
 }
@@ -245,19 +226,20 @@ void machine_crash_shutdown(struct pt_regs *regs)
  */
 static void __do_machine_kexec(void *data)
 {
-	unsigned long diag308_subcode;
-	relocate_kernel_t data_mover;
+	unsigned long data_mover, entry, diag308_subcode;
 	struct kimage *image = data;
 
-	s390_reset_system();
-	data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
-
-	__arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
-	/* Call the moving routine */
+	data_mover = page_to_phys(image->control_code_page);
+	entry = virt_to_phys(&image->head);
 	diag308_subcode = DIAG308_CLEAR_RESET;
 	if (sclp.has_iplcc)
 		diag308_subcode |= DIAG308_FLAG_EI;
-	(*data_mover)(&image->head, image->start, diag308_subcode);
+	s390_reset_system();
+
+	call_nodat(3, void, (relocate_kernel_t)data_mover,
+		   unsigned long, entry,
+		   unsigned long, image->start,
+		   unsigned long, diag308_subcode);
 
 	/* Die if kexec returns */
 	disabled_wait();
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fc6d5f58debe..c2bac14dd668 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -105,6 +105,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_CRASH_DUMP
 	if (image->type == KEXEC_TYPE_CRASH) {
 		u64 crash_size;
 
@@ -121,6 +122,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
 						     sizeof(crash_size),
 						     false);
 	}
+#endif
 	return ret;
 }
 
@@ -134,8 +136,10 @@ static int kexec_file_add_purgatory(struct kimage *image,
 
 	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
 	buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
 	if (image->type == KEXEC_TYPE_CRASH)
 		buf.mem += crashk_res.start;
+#endif
 
 	ret = kexec_load_purgatory(image, &buf);
 	if (ret)
@@ -158,8 +162,10 @@ static int kexec_file_add_initrd(struct kimage *image,
 
 	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
 	buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
 	if (image->type == KEXEC_TYPE_CRASH)
 		buf.mem += crashk_res.start;
+#endif
 	buf.memsz = buf.bufsz;
 
 	data->parm->initrd_start = data->memsz;
@@ -187,10 +193,8 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 
 	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
 	buf.mem = data->memsz;
-	if (image->type == KEXEC_TYPE_CRASH)
-		buf.mem += crashk_res.start;
 
-	ptr = (void *)ipl_cert_list_addr;
+	ptr = __va(ipl_cert_list_addr);
 	end = ptr + ipl_cert_list_size;
 	ncerts = 0;
 	while (ptr < end) {
@@ -202,7 +206,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 
 	addr = data->memsz + data->report->size;
 	addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
-	ptr = (void *)ipl_cert_list_addr;
+	ptr = __va(ipl_cert_list_addr);
 	while (ptr < end) {
 		len = *(unsigned int *)ptr;
 		ptr += sizeof(len);
@@ -225,6 +229,11 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
 	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
 
+#ifdef CONFIG_CRASH_DUMP
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+#endif
+
 	ret = kexec_add_buffer(&buf);
 out:
 	return ret;
@@ -267,10 +276,12 @@ void *kexec_file_add_components(struct kimage *image,
 	memcpy(data.parm->command_line, image->cmdline_buf,
 	       image->cmdline_buf_len);
 
+#ifdef CONFIG_CRASH_DUMP
 	if (image->type == KEXEC_TYPE_CRASH) {
 		data.parm->oldmem_base = crashk_res.start;
 		data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
 	}
+#endif
 
 	if (image->initrd_buf) {
 		ret = kexec_file_add_initrd(image, &data);
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4786bfe02144..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,15 +9,21 @@
 #include <asm/ftrace.h>
 #include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
+#include <asm/march.h>
 
+#define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS		(STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW		(STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS		(STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW		(STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2	(STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS	(STACK_FREGS_PTREGS + __PT_FLAGS)
 
-#define STACK_FRAME_SIZE	(STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS		(STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS	(STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW	(STACK_PTREGS + __PT_PSW)
-#define STACK_PTREGS_ORIG_GPR2	(STACK_PTREGS + __PT_ORIG_GPR2)
-#define STACK_PTREGS_FLAGS	(STACK_PTREGS + __PT_FLAGS)
 /* packed stack: allocate just enough for r14, r15 and backchain */
 #define TRACED_FUNC_FRAME_SIZE	24
 
@@ -28,9 +34,14 @@
 
 	.section .kprobes.text, "ax"
 
-ENTRY(ftrace_stub)
+SYM_FUNC_START(ftrace_stub)
 	BR_EX	%r14
-ENDPROC(ftrace_stub)
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_stub_direct_tramp)
+	lgr	%r1, %r0
+	BR_EX	%r1
+SYM_CODE_END(ftrace_stub_direct_tramp)
 
 	.macro	ftrace_regs_entry, allregs=0
 	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
@@ -48,23 +59,23 @@ ENDPROC(ftrace_stub)
 	stg	%r1,__SF_BACKCHAIN(%r15)
 	stg	%r0,(__SF_GPRS+8*8)(%r15)
 	stg	%r15,(__SF_GPRS+9*8)(%r15)
-	# allocate pt_regs and stack frame for ftrace_trace_function
-	aghi	%r15,-STACK_FRAME_SIZE
-	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
-	xc	STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15)
+	# allocate ftrace_regs and stack frame for ftrace_trace_function
+	aghi	%r15,-STACK_FRAME_SIZE_FREGS
+	stg	%r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+	xc	STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 
 	.if \allregs == 1
-	stg	%r14,(STACK_PTREGS_PSW)(%r15)
-	mvghi	STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+	stg	%r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+	mvghi	STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
 	.else
-	xc	STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
+	xc	STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
 	.endif
 
 	lg	%r14,(__SF_GPRS+8*8)(%r1)	# restore original return address
 	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
 	stg	%r1,__SF_BACKCHAIN(%r15)
-	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
-	stmg	%r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+	stg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	stmg	%r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
 	.endm
 
 SYM_CODE_START(ftrace_regs_caller)
@@ -78,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
 SYM_CODE_END(ftrace_caller)
 
 SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
 	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
 	lgrl	%r4,function_trace_op
 	lgrl	%r1,ftrace_func
@@ -91,30 +102,19 @@ SYM_CODE_START(ftrace_common)
 	lg	%r1,0(%r1)
 #endif
 	lgr	%r3,%r14
-	la	%r5,STACK_PTREGS(%r15)
+	la	%r5,STACK_FREGS(%r15)
 	BASR_EX	%r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
-	j	.Lftrace_graph_caller_end
-	lmg	%r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
-	lg	%r4,(STACK_PTREGS_PSW+8)(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
-.Lftrace_graph_caller_end:
-#endif
-	lg	%r0,(STACK_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-	ltg	%r1,STACK_PTREGS_ORIG_GPR2(%r15)
+	lg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+#ifdef MARCH_HAS_Z196_FEATURES
+	ltg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 	locgrz	%r1,%r0
 #else
-	lg	%r1,STACK_PTREGS_ORIG_GPR2(%r15)
+	lg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 	ltgr	%r1,%r1
 	jnz	0f
 	lgr	%r1,%r0
 #endif
-0:	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+0:	lmg	%r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
 	BR_EX	%r1
 SYM_CODE_END(ftrace_common)
 
@@ -123,10 +123,14 @@ SYM_CODE_END(ftrace_common)
 SYM_FUNC_START(return_to_handler)
 	stmg	%r2,%r5,32(%r15)
 	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
+	# allocate ftrace_regs and stack frame for ftrace_return_to_handler
+	aghi	%r15,-STACK_FRAME_SIZE_FREGS
 	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+	stg	%r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+	la	%r2,STACK_FRAME_OVERHEAD(%r15)
 	brasl	%r14,ftrace_return_to_handler
-	aghi	%r15,STACK_FRAME_OVERHEAD
+	aghi	%r15,STACK_FRAME_SIZE_FREGS
 	lgr	%r14,%r2
 	lmg	%r2,%r5,32(%r15)
 	BR_EX	%r14
@@ -135,16 +139,31 @@ SYM_FUNC_END(return_to_handler)
 #endif
 #endif /* CONFIG_FUNCTION_TRACER */
 
-#ifdef CONFIG_KPROBES
-
-SYM_FUNC_START(__kretprobe_trampoline)
-
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br)
+	lmg	%r0,%r1,2(%r1)
+	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br)
+
+#ifdef CONFIG_EXPOLINE
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl)
+	lmg	%r0,%r1,2(%r1)
+	exrl	%r0,0f
+	j	.
+0:	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_RETHOOK
+
+SYM_CODE_START(arch_rethook_trampoline)
 	stg	%r14,(__SF_GPRS+8*8)(%r15)
-	lay	%r15,-STACK_FRAME_SIZE(%r15)
+	lay	%r15,-STACK_FRAME_SIZE_PTREGS(%r15)
 	stmg	%r0,%r14,STACK_PTREGS_GPRS(%r15)
 
 	# store original stack pointer in backchain and pt_regs
-	lay	%r7,STACK_FRAME_SIZE(%r15)
+	lay	%r7,STACK_FRAME_SIZE_PTREGS(%r15)
 	stg	%r7,__SF_BACKCHAIN(%r15)
 	stg	%r7,STACK_PTREGS_GPRS+(15*8)(%r15)
 
@@ -152,16 +171,15 @@ SYM_FUNC_START(__kretprobe_trampoline)
 	epsw	%r2,%r3
 	risbg	%r3,%r2,0,31,32
 	stg	%r3,STACK_PTREGS_PSW(%r15)
-	larl	%r1,__kretprobe_trampoline
+	larl	%r1,arch_rethook_trampoline
 	stg	%r1,STACK_PTREGS_PSW+8(%r15)
 
 	lay	%r2,STACK_PTREGS(%r15)
-	brasl	%r14,trampoline_probe_handler
+	brasl	%r14,arch_rethook_trampoline_callback
 
 	mvc	__SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
 	lmg	%r0,%r15,STACK_PTREGS_GPRS(%r15)
 	lpswe	__SF_EMPTY(%r15)
+SYM_CODE_END(arch_rethook_trampoline)
 
-SYM_FUNC_END(__kretprobe_trampoline)
-
-#endif /* CONFIG_KPROBES */
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 2d159b32885b..91e207b50394 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,11 +21,13 @@
 #include <linux/moduleloader.h>
 #include <linux/bug.h>
 #include <linux/memory.h>
+#include <linux/execmem.h>
 #include <asm/alternative.h>
 #include <asm/nospec-branch.h>
 #include <asm/facility.h>
 #include <asm/ftrace.lds.h>
 #include <asm/set_memory.h>
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP printk
@@ -35,27 +37,10 @@
 
 #define PLT_ENTRY_SIZE 22
 
-void *module_alloc(unsigned long size)
-{
-	gfp_t gfp_mask = GFP_KERNEL;
-	void *p;
-
-	if (PAGE_ALIGN(size) > MODULES_LEN)
-		return NULL;
-	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
-				 gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
-				 __builtin_return_address(0));
-	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
-		vfree(p);
-		return NULL;
-	}
-	return p;
-}
-
 #ifdef CONFIG_FUNCTION_TRACER
 void module_arch_cleanup(struct module *mod)
 {
-	module_memfree(mod->arch.trampolines_start);
+	execmem_free(mod->arch.trampolines_start);
 }
 #endif
 
@@ -126,6 +111,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 	Elf_Rela *rela;
 	char *strings;
 	int nrela, i, j;
+	struct module_memory *mod_mem;
 
 	/* Find symbol table and string table. */
 	symtab = NULL;
@@ -173,14 +159,15 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 
 	/* Increase core size by size of got & plt and set start
 	   offsets for got and plt. */
-	me->core_layout.size = ALIGN(me->core_layout.size, 4);
-	me->arch.got_offset = me->core_layout.size;
-	me->core_layout.size += me->arch.got_size;
-	me->arch.plt_offset = me->core_layout.size;
+	mod_mem = &me->mem[MOD_TEXT];
+	mod_mem->size = ALIGN(mod_mem->size, 4);
+	me->arch.got_offset = mod_mem->size;
+	mod_mem->size += me->arch.got_size;
+	me->arch.plt_offset = mod_mem->size;
 	if (me->arch.plt_size) {
 		if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
 			me->arch.plt_size += PLT_ENTRY_SIZE;
-		me->core_layout.size += me->arch.plt_size;
+		mod_mem->size += me->arch.plt_size;
 	}
 	return 0;
 }
@@ -304,7 +291,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
 	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
 		if (info->got_initialized == 0) {
-			Elf_Addr *gotent = me->core_layout.base +
+			Elf_Addr *gotent = me->mem[MOD_TEXT].base +
 					   me->arch.got_offset +
 					   info->got_offset;
 
@@ -329,7 +316,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
 		else if (r_type == R_390_GOTENT ||
 			 r_type == R_390_GOTPLTENT) {
-			val += (Elf_Addr) me->core_layout.base - loc;
+			val += (Elf_Addr)me->mem[MOD_TEXT].base +
+				me->arch.got_offset - loc;
 			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
 		}
 		break;
@@ -345,7 +333,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 			char *plt_base;
 			char *ip;
 
-			plt_base = me->core_layout.base + me->arch.plt_offset;
+			plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset;
 			ip = plt_base + info->plt_offset;
 			*(int *)insn = 0x0d10e310;	/* basr 1,0  */
 			*(int *)&insn[4] = 0x100c0004;	/* lg	1,12(1) */
@@ -375,7 +363,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 			       val - loc + 0xffffUL < 0x1ffffeUL) ||
 			      (r_type == R_390_PLT32DBL &&
 			       val - loc + 0xffffffffULL < 0x1fffffffeULL)))
-				val = (Elf_Addr) me->core_layout.base +
+				val = (Elf_Addr) me->mem[MOD_TEXT].base +
 					me->arch.plt_offset +
 					info->plt_offset;
 			val += rela->r_addend - loc;
@@ -397,7 +385,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
 	case R_390_GOTOFF64:	/* 64 bit offset to GOT. */
 		val = val + rela->r_addend -
-			((Elf_Addr) me->core_layout.base + me->arch.got_offset);
+			((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset);
 		if (r_type == R_390_GOTOFF16)
 			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
 		else if (r_type == R_390_GOTOFF32)
@@ -407,7 +395,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 		break;
 	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
 	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
-		val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
+		val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset +
 			rela->r_addend - loc;
 		if (r_type == R_390_GOTPC)
 			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
@@ -486,10 +474,10 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
 
 	size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
 	numpages = DIV_ROUND_UP(size, PAGE_SIZE);
-	start = module_alloc(numpages * PAGE_SIZE);
+	start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
 	if (!start)
 		return -ENOMEM;
-	set_memory_ro((unsigned long)start, numpages);
+	set_memory_rox((unsigned long)start, numpages);
 	end = start + size;
 
 	me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
@@ -515,7 +503,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 	    !nospec_disable && me->arch.plt_size) {
 		unsigned int *ij;
 
-		ij = me->core_layout.base + me->arch.plt_offset +
+		ij = me->mem[MOD_TEXT].base + me->arch.plt_offset +
 			me->arch.plt_size - PLT_ENTRY_SIZE;
 		ij[0] = 0xc6000000;	/* exrl	%r0,.+10	*/
 		ij[1] = 0x0005a7f4;	/* j	.		*/
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 53ed3884fe64..3da371c144eb 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -9,8 +9,10 @@
  */
 
 #include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/entry-common.h>
 #include <linux/hardirq.h>
 #include <linux/log2.h>
 #include <linux/kprobes.h>
@@ -18,20 +20,19 @@
 #include <linux/time.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
-
+#include <linux/kvm_host.h>
 #include <linux/export.h>
 #include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+#include <asm/fpu.h>
 #include <asm/smp.h>
 #include <asm/stp.h>
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
-#include <asm/switch_to.h>
-#include <asm/ctl_reg.h>
 #include <asm/asm-offsets.h>
 #include <asm/pai.h>
-
-#include <linux/kvm_host.h>
+#include <asm/vtime.h>
 
 struct mcck_struct {
 	unsigned int kill_task : 1;
@@ -42,19 +43,10 @@ struct mcck_struct {
 };
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
-static struct kmem_cache *mcesa_cache;
-static unsigned long mcesa_origin_lc;
 
 static inline int nmi_needs_mcesa(void)
 {
-	return MACHINE_HAS_VX || MACHINE_HAS_GS;
-}
-
-static inline unsigned long nmi_get_mcesa_size(void)
-{
-	if (MACHINE_HAS_GS)
-		return MCESA_MAX_SIZE;
-	return MCESA_MIN_SIZE;
+	return cpu_has_vx() || cpu_has_gs();
 }
 
 /*
@@ -63,47 +55,34 @@ static inline unsigned long nmi_get_mcesa_size(void)
  * structure. The structure is required for machine check happening
  * early in the boot process.
  */
-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
 
 void __init nmi_alloc_mcesa_early(u64 *mcesad)
 {
 	if (!nmi_needs_mcesa())
 		return;
 	*mcesad = __pa(&boot_mcesa);
-	if (MACHINE_HAS_GS)
+	if (cpu_has_gs())
 		*mcesad |= ilog2(MCESA_MAX_SIZE);
 }
 
-static void __init nmi_alloc_cache(void)
+int nmi_alloc_mcesa(u64 *mcesad)
 {
 	unsigned long size;
-
-	if (!nmi_needs_mcesa())
-		return;
-	size = nmi_get_mcesa_size();
-	if (size > MCESA_MIN_SIZE)
-		mcesa_origin_lc = ilog2(size);
-	/* create slab cache for the machine-check-extended-save-areas */
-	mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
-	if (!mcesa_cache)
-		panic("Couldn't create nmi save area cache");
-}
-
-int __ref nmi_alloc_mcesa(u64 *mcesad)
-{
-	unsigned long origin;
+	void *origin;
 
 	*mcesad = 0;
 	if (!nmi_needs_mcesa())
 		return 0;
-	if (!mcesa_cache)
-		nmi_alloc_cache();
-	origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
+	size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+	origin = kmalloc(size, GFP_KERNEL);
 	if (!origin)
 		return -ENOMEM;
 	/* The pointer is stored with mcesa_bits ORed in */
-	kmemleak_not_leak((void *) origin);
-	*mcesad = __pa(origin) | mcesa_origin_lc;
+	kmemleak_not_leak(origin);
+	*mcesad = __pa(origin);
+	if (cpu_has_gs())
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
 	return 0;
 }
 
@@ -111,12 +90,65 @@ void nmi_free_mcesa(u64 *mcesad)
 {
 	if (!nmi_needs_mcesa())
 		return;
-	kmem_cache_free(mcesa_cache, __va(*mcesad & MCESA_ORIGIN_MASK));
+	kfree(__va(*mcesad & MCESA_ORIGIN_MASK));
+}
+
+static __always_inline char *nmi_puts(char *dest, const char *src)
+{
+	while (*src)
+		*dest++ = *src++;
+	*dest = 0;
+	return dest;
+}
+
+static __always_inline char *u64_to_hex(char *dest, u64 val)
+{
+	int i, num;
+
+	for (i = 1; i <= 16; i++) {
+		num = (val >> (64 - 4 * i)) & 0xf;
+		if (num >= 10)
+			*dest++ = 'A' + num - 10;
+		else
+			*dest++ = '0' + num;
+	}
+	*dest = 0;
+	return dest;
 }
 
 static notrace void s390_handle_damage(void)
 {
+	struct lowcore *lc = get_lowcore();
+	union ctlreg0 cr0, cr0_new;
+	char message[100];
+	psw_t psw_save;
+	char *ptr;
+
 	smp_emergency_stop();
+	diag_amode31_ops.diag308_reset();
+	ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
+	u64_to_hex(ptr, lc->mcck_interruption_code);
+
+	/*
+	 * Disable low address protection and make machine check new PSW a
+	 * disabled wait PSW. Any additional machine check cannot be handled.
+	 */
+	local_ctl_store(0, &cr0.reg);
+	cr0_new = cr0;
+	cr0_new.lap = 0;
+	local_ctl_load(0, &cr0_new.reg);
+	psw_save = lc->mcck_new_psw;
+	psw_bits(lc->mcck_new_psw).io = 0;
+	psw_bits(lc->mcck_new_psw).ext = 0;
+	psw_bits(lc->mcck_new_psw).wait = 1;
+	sclp_emergency_printk(message);
+
+	/*
+	 * Restore machine check new PSW and control register 0 to original
+	 * values. This makes possible system dump analysis easier.
+	 */
+	lc->mcck_new_psw = psw_save;
+	local_ctl_load(0, &cr0.reg);
 	disabled_wait();
 	while (1);
 }
@@ -126,19 +158,20 @@ NOKPROBE_SYMBOL(s390_handle_damage);
  * Main machine check handler function. Will be called with interrupts disabled
  * and machine checks enabled.
  */
-void __s390_handle_mcck(void)
+void s390_handle_mcck(void)
 {
 	struct mcck_struct mcck;
+	unsigned long mflags;
 
 	/*
 	 * Disable machine checks and get the current state of accumulated
 	 * machine checks. Afterwards delete the old state and enable machine
 	 * checks again.
 	 */
-	local_mcck_disable();
+	local_mcck_save(mflags);
 	mcck = *this_cpu_ptr(&cpu_mcck);
 	memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
-	local_mcck_enable();
+	local_mcck_restore(mflags);
 
 	if (mcck.channel_report)
 		crw_handle_channel_report();
@@ -155,203 +188,80 @@ void __s390_handle_mcck(void)
 		static int mchchk_wng_posted = 0;
 
 		/* Use single cpu clear, as we cannot handle smp here. */
-		__ctl_clear_bit(14, 24);	/* Disable WARNING MCH */
+		local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT);
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
 	if (mcck.stp_queue)
 		stp_queue_work();
 	if (mcck.kill_task) {
-		local_irq_enable();
 		printk(KERN_EMERG "mcck: Terminating task because of machine "
 		       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 		       current->comm, current->pid);
-		make_task_dead(SIGSEGV);
+		if (is_global_init(current))
+			panic("mcck: Attempting to kill init!\n");
+		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
 	}
 }
 
-void noinstr s390_handle_mcck(struct pt_regs *regs)
-{
-	trace_hardirqs_off();
-	pai_kernel_enter(regs);
-	__s390_handle_mcck();
-	pai_kernel_exit(regs);
-	trace_hardirqs_on();
-}
-/*
- * returns 0 if all required registers are available
- * returns 1 otherwise
+/**
+ * nmi_registers_valid - verify if registers are valid
+ * @mci: machine check interruption code
+ *
+ * Inspect a machine check interruption code and verify if all required
+ * registers are valid. For some registers the corresponding validity bit is
+ * ignored and the registers are set to the expected value.
+ * Returns true if all registers are valid, otherwise false.
  */
-static int notrace s390_validate_registers(union mci mci, int umode)
+static bool notrace nmi_registers_valid(union mci mci)
 {
-	struct mcesa *mcesa;
-	void *fpt_save_area;
 	union ctlreg2 cr2;
-	int kill_task;
-	u64 zero;
-
-	kill_task = 0;
-	zero = 0;
 
-	if (!mci.gr) {
-		/*
-		 * General purpose registers couldn't be restored and have
-		 * unknown contents. Stop system or terminate process.
-		 */
-		if (!umode)
-			s390_handle_damage();
-		kill_task = 1;
-	}
-	if (!mci.fp) {
-		/*
-		 * Floating point registers can't be restored. If the
-		 * kernel currently uses floating point registers the
-		 * system is stopped. If the process has its floating
-		 * pointer registers loaded it is terminated.
-		 */
-		if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
-			s390_handle_damage();
-		if (!test_cpu_flag(CIF_FPU))
-			kill_task = 1;
-	}
-	fpt_save_area = &S390_lowcore.floating_pt_save_area;
-	if (!mci.fc) {
-		/*
-		 * Floating point control register can't be restored.
-		 * If the kernel currently uses the floating pointer
-		 * registers and needs the FPC register the system is
-		 * stopped. If the process has its floating pointer
-		 * registers loaded it is terminated. Otherwise the
-		 * FPC is just validated.
-		 */
-		if (S390_lowcore.fpu_flags & KERNEL_FPC)
-			s390_handle_damage();
-		asm volatile(
-			"	lfpc	%0\n"
-			:
-			: "Q" (zero));
-		if (!test_cpu_flag(CIF_FPU))
-			kill_task = 1;
-	} else {
-		asm volatile(
-			"	lfpc	%0\n"
-			:
-			: "Q" (S390_lowcore.fpt_creg_save_area));
-	}
-
-	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
-	if (!MACHINE_HAS_VX) {
-		/* Validate floating point registers */
-		asm volatile(
-			"	ld	0,0(%0)\n"
-			"	ld	1,8(%0)\n"
-			"	ld	2,16(%0)\n"
-			"	ld	3,24(%0)\n"
-			"	ld	4,32(%0)\n"
-			"	ld	5,40(%0)\n"
-			"	ld	6,48(%0)\n"
-			"	ld	7,56(%0)\n"
-			"	ld	8,64(%0)\n"
-			"	ld	9,72(%0)\n"
-			"	ld	10,80(%0)\n"
-			"	ld	11,88(%0)\n"
-			"	ld	12,96(%0)\n"
-			"	ld	13,104(%0)\n"
-			"	ld	14,112(%0)\n"
-			"	ld	15,120(%0)\n"
-			:
-			: "a" (fpt_save_area)
-			: "memory");
-	} else {
-		/* Validate vector registers */
-		union ctlreg0 cr0;
-
-		/*
-		 * The vector validity must only be checked if not running a
-		 * KVM guest. For KVM guests the machine check is forwarded by
-		 * KVM and it is the responsibility of the guest to take
-		 * appropriate actions. The host vector or FPU values have been
-		 * saved by KVM and will be restored by KVM.
-		 */
-		if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
-			/*
-			 * Vector registers can't be restored. If the kernel
-			 * currently uses vector registers the system is
-			 * stopped. If the process has its vector registers
-			 * loaded it is terminated. Otherwise just validate
-			 * the registers.
-			 */
-			if (S390_lowcore.fpu_flags & KERNEL_VXR)
-				s390_handle_damage();
-			if (!test_cpu_flag(CIF_FPU))
-				kill_task = 1;
-		}
-		cr0.val = S390_lowcore.cregs_save_area[0];
-		cr0.afp = cr0.vx = 1;
-		__ctl_load(cr0.val, 0, 0);
-		asm volatile(
-			"	la	1,%0\n"
-			"	.word	0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
-			"	.word	0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
-			:
-			: "Q" (*(struct vx_array *)mcesa->vector_save_area)
-			: "1");
-		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
-	}
-	/* Validate access registers */
-	asm volatile(
-		"	lam	0,15,0(%0)\n"
-		:
-		: "a" (&S390_lowcore.access_regs_save_area)
-		: "memory");
-	if (!mci.ar) {
-		/*
-		 * Access registers have unknown contents.
-		 * Terminating task.
-		 */
-		kill_task = 1;
-	}
-	/* Validate guarded storage registers */
-	cr2.val = S390_lowcore.cregs_save_area[2];
-	if (cr2.gse) {
-		if (!mci.gs) {
-			/*
-			 * 2 cases:
-			 * - machine check in kernel or userspace
-			 * - machine check while running SIE (KVM guest)
-			 * For kernel or userspace the userspace values of
-			 * guarded storage control can not be recreated, the
-			 * process must be terminated.
-			 * For SIE the guest values of guarded storage can not
-			 * be recreated. This is either due to a bug or due to
-			 * GS being disabled in the guest. The guest will be
-			 * notified by KVM code and the guests machine check
-			 * handling must take care of this.  The host values
-			 * are saved by KVM and are not affected.
-			 */
-			if (!test_cpu_flag(CIF_MCCK_GUEST))
-				kill_task = 1;
-		} else {
-			load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
-		}
-	}
 	/*
-	 * The getcpu vdso syscall reads CPU number from the programmable
+	 * The getcpu vdso syscall reads the CPU number from the programmable
 	 * field of the TOD clock. Disregard the TOD programmable register
-	 * validity bit and load the CPU number into the TOD programmable
-	 * field unconditionally.
+	 * validity bit and load the CPU number into the TOD programmable field
+	 * unconditionally.
 	 */
 	set_tod_programmable_field(raw_smp_processor_id());
-	/* Validate clock comparator register */
-	set_clock_comparator(S390_lowcore.clock_comparator);
-
+	/*
+	 * Set the clock comparator register to the next expected value.
+	 */
+	set_clock_comparator(get_lowcore()->clock_comparator);
+	if (!mci.gr || !mci.fp || !mci.fc)
+		return false;
+	/*
+	 * The vector validity must only be checked if not running a
+	 * KVM guest. For KVM guests the machine check is forwarded by
+	 * KVM and it is the responsibility of the guest to take
+	 * appropriate actions. The host vector or FPU values have been
+	 * saved by KVM and will be restored by KVM.
+	 */
+	if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+		return false;
+	if (!mci.ar)
+		return false;
+	/*
+	 * Two cases for guarded storage registers:
+	 * - machine check in kernel or userspace
+	 * - machine check while running SIE (KVM guest)
+	 * For kernel or userspace the userspace values of guarded storage
+	 * control can not be recreated, the process must be terminated.
+	 * For SIE the guest values of guarded storage can not be recreated.
+	 * This is either due to a bug or due to GS being disabled in the
+	 * guest. The guest will be notified by KVM code and the guests machine
+	 * check handling must take care of this. The host values are saved by
+	 * KVM and are not affected.
+	 */
+	cr2.reg = get_lowcore()->cregs_save_area[2];
+	if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
+		return false;
 	if (!mci.ms || !mci.pm || !mci.ia)
-		kill_task = 1;
-
-	return kill_task;
+		return false;
+	return true;
 }
-NOKPROBE_SYMBOL(s390_validate_registers);
+NOKPROBE_SYMBOL(nmi_registers_valid);
 
 /*
  * Backup the guest's machine check info to its description block
@@ -362,8 +272,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
 	struct sie_page *sie_page;
 
 	/* r14 contains the sie block, which was set in sie64a */
-	struct kvm_s390_sie_block *sie_block =
-			(struct kvm_s390_sie_block *) regs->gprs[14];
+	struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
 
 	if (sie_block == NULL)
 		/* Something's seriously wrong, stop system. */
@@ -371,11 +280,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
 
 	sie_page = container_of(sie_block, struct sie_page, sie_block);
 	mcck_backup = &sie_page->mcck_info;
-	mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+	mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
 				~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
-	mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
-	mcck_backup->failing_storage_address
-			= S390_lowcore.failing_storage_address;
+	mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+	mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
 }
 NOKPROBE_SYMBOL(s390_backup_mcck_info);
 
@@ -390,23 +298,25 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info);
 /*
  * machine check handler.
  */
-int notrace s390_do_machine_check(struct pt_regs *regs)
+void notrace s390_do_machine_check(struct pt_regs *regs)
 {
 	static int ipd_count;
 	static DEFINE_SPINLOCK(ipd_lock);
 	static unsigned long long last_ipd;
+	struct lowcore *lc = get_lowcore();
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
+	irqentry_state_t irq_state;
 	union mci mci;
 	unsigned long mcck_dam_code;
 	int mcck_pending = 0;
 
-	nmi_enter();
+	irq_state = irqentry_nmi_enter(regs);
 
 	if (user_mode(regs))
 		update_timer_mcck();
 	inc_irq_stat(NMI_NMI);
-	mci.val = S390_lowcore.mcck_interruption_code;
+	mci.val = lc->mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
 
 	/*
@@ -449,7 +359,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
 			s390_handle_damage();
 		}
 	}
-	if (s390_validate_registers(mci, user_mode(regs))) {
+	if (!nmi_registers_valid(mci)) {
+		if (!user_mode(regs))
+			s390_handle_damage();
 		/*
 		 * Couldn't restore all register contents for the
 		 * user space process -> mark task for termination.
@@ -472,13 +384,27 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	if (mci.ed && mci.ec) {
 		/* External damage */
-		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+		if (lc->external_damage_code & (1U << ED_STP_SYNC))
 			mcck->stp_queue |= stp_sync_check();
-		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+		if (lc->external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
 		mcck_pending = 1;
 	}
-
+	/*
+	 * Reinject storage related machine checks into the guest if they
+	 * happen when the guest is running.
+	 */
+	if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+		/* Storage error uncorrected */
+		if (mci.se)
+			s390_handle_damage();
+		/* Storage key-error uncorrected */
+		if (mci.ke)
+			s390_handle_damage();
+		/* Storage degradation */
+		if (mci.ds && mci.fa)
+			s390_handle_damage();
+	}
 	if (mci.cp) {
 		/* Channel report word pending */
 		mcck->channel_report = 1;
@@ -503,24 +429,18 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	clear_cpu_flag(CIF_MCCK_GUEST);
 
-	if (user_mode(regs) && mcck_pending) {
-		nmi_exit();
-		return 1;
-	}
-
 	if (mcck_pending)
 		schedule_mcck_handler();
 
-	nmi_exit();
-	return 0;
+	irqentry_nmi_exit(regs, irq_state);
 }
 NOKPROBE_SYMBOL(s390_do_machine_check);
 
 static int __init machine_check_init(void)
 {
-	ctl_set_bit(14, 25);	/* enable external damage MCH */
-	ctl_set_bit(14, 27);	/* enable system recovery MCH */
-	ctl_set_bit(14, 24);	/* enable warning MCH */
+	system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT);
+	system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT);
+	system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT);
 	return 0;
 }
 early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 717bbcc056e5..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
 #include <linux/cpu.h>
 #include <asm/nospec-branch.h>
 
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
 static int __init nobp_setup_early(char *str)
 {
 	bool enabled;
@@ -14,14 +16,14 @@ static int __init nobp_setup_early(char *str)
 		return rc;
 	if (enabled && test_facility(82)) {
 		/*
-		 * The user explicitely requested nobp=1, enable it and
+		 * The user explicitly requested nobp=1, enable it and
 		 * disable the expoline support.
 		 */
-		__set_facility(82, alt_stfle_fac_list);
+		nobp = 1;
 		if (IS_ENABLED(CONFIG_EXPOLINE))
 			nospec_disable = 1;
 	} else {
-		__clear_facility(82, alt_stfle_fac_list);
+		nobp = 0;
 	}
 	return 0;
 }
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
 
 static int __init nospec_setup_early(char *str)
 {
-	__clear_facility(82, alt_stfle_fac_list);
+	nobp = 0;
 	return 0;
 }
 early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
 		pr_info("Spectre V2 mitigation: etokens\n");
 	if (nospec_uses_trampoline())
 		pr_info("Spectre V2 mitigation: execute trampolines\n");
-	if (__test_facility(82, alt_stfle_fac_list))
+	if (nobp_enabled())
 		pr_info("Spectre V2 mitigation: limited branch prediction\n");
 	return 0;
 }
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
 		 */
 		if (__is_defined(CC_USING_EXPOLINE))
 			nospec_disable = 1;
-		__clear_facility(82, alt_stfle_fac_list);
+		nobp = 0;
 	} else if (__is_defined(CC_USING_EXPOLINE)) {
 		/*
 		 * The kernel has been compiled with expolines.
 		 * Keep expolines enabled and disable nobp.
 		 */
 		nospec_disable = 0;
-		__clear_facility(82, alt_stfle_fac_list);
+		nobp = 0;
 	}
 	/*
 	 * If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
 {
 	if (str && !strncmp(str, "on", 2)) {
 		nospec_disable = 0;
-		__clear_facility(82, alt_stfle_fac_list);
+		nobp = 0;
 	}
 	if (str && !strncmp(str, "off", 3))
 		nospec_disable = 1;
@@ -114,10 +116,10 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
 			type = BRASL_EXPOLINE;	/* brasl instruction */
 		else
 			continue;
-		thunk = instr + (*(int *)(instr + 2)) * 2;
+		thunk = instr + (long)(*(int *)(instr + 2)) * 2;
 		if (thunk[0] == 0xc6 && thunk[1] == 0x00)
 			/* exrl %r0,<target-br> */
-			br = thunk + (*(int *)(thunk + 2)) * 2;
+			br = thunk + (long)(*(int *)(thunk + 2)) * 2;
 		else
 			continue;
 		if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..5970dd3ee7c5 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -7,17 +7,17 @@
 ssize_t cpu_show_spectre_v1(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+	return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n");
 }
 
 ssize_t cpu_show_spectre_v2(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	if (test_facility(156))
-		return sprintf(buf, "Mitigation: etokens\n");
+		return sysfs_emit(buf, "Mitigation: etokens\n");
 	if (nospec_uses_trampoline())
-		return sprintf(buf, "Mitigation: execute trampolines\n");
-	if (__test_facility(82, alt_stfle_fac_list))
-		return sprintf(buf, "Mitigation: limited branch prediction\n");
-	return sprintf(buf, "Vulnerable\n");
+		return sysfs_emit(buf, "Mitigation: execute trampolines\n");
+	if (nobp_enabled())
+		return sysfs_emit(buf, "Mitigation: limited branch prediction\n");
+	return sysfs_emit(buf, "Vulnerable\n");
 }
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..2fc40f97c0ad 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
 #include <linux/node.h>
 #include <asm/numa.h>
 
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
 void __init numa_setup(void)
 {
 	int nid;
@@ -24,12 +21,8 @@ void __init numa_setup(void)
 	nodes_clear(node_possible_map);
 	node_set(0, node_possible_map);
 	node_set_online(0);
-	for (nid = 0; nid < MAX_NUMNODES; nid++) {
-		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
-		if (!NODE_DATA(nid))
-			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-			      __func__, sizeof(pg_data_t), 8);
-	}
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
 	NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	NODE_DATA(0)->node_id = 0;
 }
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..c2a468986212 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,9 +13,13 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
 #include <asm/asm-offsets.h>
+#include <asm/sections.h>
+#include <asm/ipl.h>
 
 /*
  * OS info structure has to be page aligned
@@ -28,7 +32,7 @@ static struct os_info os_info __page_aligned_data;
 u32 os_info_csum(struct os_info *os_info)
 {
 	int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
-	return (__force u32)csum_partial(&os_info->version_major, size, 0);
+	return (__force u32)cksm(&os_info->version_major, size, 0);
 }
 
 /*
@@ -42,13 +46,24 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
 }
 
 /*
- * Add OS info entry and update checksum
+ * Add OS info data entry and update checksum
  */
-void os_info_entry_add(int nr, void *ptr, u64 size)
+void os_info_entry_add_data(int nr, void *ptr, u64 size)
 {
 	os_info.entry[nr].addr = __pa(ptr);
 	os_info.entry[nr].size = size;
-	os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+	os_info.entry[nr].csum = (__force u32)cksm(ptr, size, 0);
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info value entry and update checksum
+ */
+void os_info_entry_add_val(int nr, u64 value)
+{
+	os_info.entry[nr].val = value;
+	os_info.entry[nr].size = 0;
+	os_info.entry[nr].csum = 0;
 	os_info.csum = os_info_csum(&os_info);
 }
 
@@ -57,13 +72,25 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
  */
 void __init os_info_init(void)
 {
-	void *ptr = &os_info;
+	struct lowcore *abs_lc;
 
+	BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE);
 	os_info.version_major = OS_INFO_VERSION_MAJOR;
 	os_info.version_minor = OS_INFO_VERSION_MINOR;
 	os_info.magic = OS_INFO_MAGIC;
+	os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base);
+	os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset());
+	os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys);
+	os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap);
+	os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START);
+	os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END);
+	os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext);
+	os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end);
+	os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext));
 	os_info.csum = os_info_csum(&os_info);
-	put_abs_lowcore(os_info, __pa(ptr));
+	abs_lc = get_abs_lowcore();
+	abs_lc->os_info = __pa(&os_info);
+	put_abs_lowcore(abs_lc);
 }
 
 #ifdef CONFIG_CRASH_DUMP
@@ -95,7 +122,7 @@ static void os_info_old_alloc(int nr, int align)
 		msg = "copy failed";
 		goto fail_free;
 	}
-	csum = (__force u32)csum_partial(buf_align, size, 0);
+	csum = (__force u32)cksm(buf_align, size, 0);
 	if (csum != os_info_old->entry[nr].csum) {
 		msg = "checksum failed";
 		goto fail_free;
@@ -122,7 +149,7 @@ static void os_info_old_init(void)
 
 	if (os_info_init)
 		return;
-	if (!oldmem_data.start)
+	if (!oldmem_data.start && !is_ipl_type_dump())
 		goto fail;
 	if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
 		goto fail;
@@ -154,7 +181,7 @@ fail:
 }
 
 /*
- * Return pointer to os infor entry and its size
+ * Return pointer to os info entry and its size
  */
 void *os_info_old_entry(int nr, unsigned long *size)
 {
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f7dd3c849e68..6a262e198e35 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,7 +2,7 @@
 /*
  * Performance event support for s390x - CPU-measurement Counter Facility
  *
- *  Copyright IBM Corp. 2012, 2021
+ *  Copyright IBM Corp. 2012, 2023
  *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
  *	       Thomas Richter <tmricht@linux.ibm.com>
  */
@@ -16,14 +16,310 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/miscdevice.h>
+#include <linux/perf_event.h>
 
-#include <asm/cpu_mcf.h>
+#include <asm/cpu_mf.h>
 #include <asm/hwctrset.h>
 #include <asm/debug.h>
 
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		0xffffUL  /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG		0xBC000UL /* Event: Counter sets */
+
+enum cpumf_ctr_set {
+	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
+	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
+	CPUMF_CTR_SET_CRYPTO  = 2,    /* Crypto-Activity Counter Set */
+	CPUMF_CTR_SET_EXT     = 3,    /* Extended Counter Set */
+	CPUMF_CTR_SET_MT_DIAG = 4,    /* MT-diagnostic Counter Set */
+
+	/* Maximum number of counter sets */
+	CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT    16
+#define CPUMF_LCCTL_ACTCTL_SHIFT     0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+		return stcctm(BASIC, range, dest);
+	case CPUMF_CTR_SET_USER:
+		return stcctm(PROBLEM_STATE, range, dest);
+	case CPUMF_CTR_SET_CRYPTO:
+		return stcctm(CRYPTO_ACTIVITY, range, dest);
+	case CPUMF_CTR_SET_EXT:
+		return stcctm(EXTENDED, range, dest);
+	case CPUMF_CTR_SET_MT_DIAG:
+		return stcctm(MT_DIAG_CLEARING, range, dest);
+	case CPUMF_CTR_SET_MAX:
+		return 3;
+	}
+	return 3;
+}
+
+struct cpu_cf_events {
+	refcount_t refcnt;		/* Reference count */
+	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
+	u64			state;		/* For perf_event_open SVC */
+	u64			dev_state;	/* For /dev/hwctr */
+	unsigned int		flags;
+	size_t used;			/* Bytes used in data */
+	size_t usedss;			/* Bytes used in start/stop */
+	unsigned char start[PAGE_SIZE];	/* Counter set at event add */
+	unsigned char stop[PAGE_SIZE];	/* Counter set at event delete */
+	unsigned char data[PAGE_SIZE];	/* Counter set at /dev/hwctr */
+	unsigned int sets;		/* # Counter set saved in memory */
+};
+
 static unsigned int cfdiag_cpu_speed;	/* CPU speed for CF_DIAG trailer */
 static debug_info_t *cf_dbg;
 
+/*
+ * The CPU Measurement query counter information instruction contains
+ * information which varies per machine generation, but is constant and
+ * does not change when running on a particular machine, such as counter
+ * first and second version number. This is needed to determine the size
+ * of counter sets. Extract this information at device driver initialization.
+ */
+static struct cpumf_ctr_info	cpumf_ctr_info;
+
+struct cpu_cf_ptr {
+	struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ *   CPU hot plug remove can not happen. Event removal requires a close()
+ *   first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ *   All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+	struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+	if (p) {
+		struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+		return q->cpucf;
+	}
+	return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+	return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+	lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+	if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+		return;
+	free_percpu(cpu_cf_root.cfptr);
+	cpu_cf_root.cfptr = NULL;
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+	debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
+			    __func__, refcount_read(&cpu_cf_root.refcnt),
+			    !cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+	int rc = 0;
+
+	if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+		return rc;
+
+	/* The memory is already zeroed. */
+	cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+	if (cpu_cf_root.cfptr) {
+		refcount_set(&cpu_cf_root.refcnt, 1);
+		on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+		irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	} else {
+		rc = -ENOMEM;
+	}
+
+	return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+
+	mutex_lock(&pmc_reserve_mutex);
+	/*
+	 * When invoked via CPU hotplug handler, there might be no events
+	 * installed or that particular CPU might not have an
+	 * event installed. This anchor pointer can be NULL!
+	 */
+	if (!cpu_cf_root.cfptr)
+		goto out;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+	/*
+	 * Might be zero when called from CPU hotplug handler and no event
+	 * installed on that CPU, but on different CPUs.
+	 */
+	if (!cpuhw)
+		goto out;
+
+	if (refcount_dec_and_test(&cpuhw->refcnt)) {
+		kfree(cpuhw);
+		p->cpucf = NULL;
+	}
+	cpum_cf_free_root();
+out:
+	mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+	int rc;
+
+	mutex_lock(&pmc_reserve_mutex);
+	rc = cpum_cf_alloc_root();
+	if (rc)
+		goto unlock;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+
+	if (!cpuhw) {
+		cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+		if (cpuhw) {
+			p->cpucf = cpuhw;
+			refcount_set(&cpuhw->refcnt, 1);
+		} else {
+			rc = -ENOMEM;
+		}
+	} else {
+		refcount_inc(&cpuhw->refcnt);
+	}
+	if (rc) {
+		/*
+		 * Error in allocation of event, decrement anchor. Since
+		 * cpu_cf_event in not created, its destroy() function is not
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		cpum_cf_free_root();
+	}
+unlock:
+	mutex_unlock(&pmc_reserve_mutex);
+	return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+	cpumask_var_t mask;
+	int rc;
+
+	if (cpu == -1) {
+		if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+			return -ENOMEM;
+		for_each_online_cpu(cpu) {
+			rc = cpum_cf_alloc_cpu(cpu);
+			if (rc) {
+				for_each_cpu(cpu, mask)
+					cpum_cf_free_cpu(cpu);
+				break;
+			}
+			cpumask_set_cpu(cpu, mask);
+		}
+		free_cpumask_var(mask);
+	} else {
+		rc = cpum_cf_alloc_cpu(cpu);
+	}
+	return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+	if (cpu == -1) {
+		for_each_online_cpu(cpu)
+			cpum_cf_free_cpu(cpu);
+	} else {
+		cpum_cf_free_cpu(cpu);
+	}
+}
+
 #define	CF_DIAG_CTRSET_DEF		0xfeef	/* Counter set header mark */
 						/* interval in seconds */
 
@@ -96,11 +392,10 @@ struct cf_trailer_entry {	/* CPU-M CF_DIAG trailer (64 byte) */
 /* Create the trailer data at the end of a page. */
 static void cfdiag_trailer(struct cf_trailer_entry *te)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 	struct cpuid cpuid;
 
-	te->cfvn = cpuhw->info.cfvn;		/* Counter version numbers */
-	te->csvn = cpuhw->info.csvn;
+	te->cfvn = cpumf_ctr_info.cfvn;		/* Counter version numbers */
+	te->csvn = cpumf_ctr_info.csvn;
 
 	get_cpu_id(&cpuid);			/* Machine type */
 	te->mach_type = cpuid.machine;
@@ -112,6 +407,63 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
 	te->timestamp = get_tod_clock_fast();
 }
 
+/*
+ * The number of counters per counter set varies between machine generations,
+ * but is constant when running on a particular machine generation.
+ * Determine each counter set size at device driver initialization and
+ * retrieve it later.
+ */
+static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
+static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
+{
+	size_t ctrset_size = 0;
+
+	switch (ctrset) {
+	case CPUMF_CTR_SET_BASIC:
+		if (cpumf_ctr_info.cfvn >= 1)
+			ctrset_size = 6;
+		break;
+	case CPUMF_CTR_SET_USER:
+		if (cpumf_ctr_info.cfvn == 1)
+			ctrset_size = 6;
+		else if (cpumf_ctr_info.cfvn >= 3)
+			ctrset_size = 2;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+		if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 16;
+		else if (cpumf_ctr_info.csvn >= 6)
+			ctrset_size = 20;
+		break;
+	case CPUMF_CTR_SET_EXT:
+		if (cpumf_ctr_info.csvn == 1)
+			ctrset_size = 32;
+		else if (cpumf_ctr_info.csvn == 2)
+			ctrset_size = 48;
+		else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 128;
+		else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
+			ctrset_size = 160;
+		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpumf_ctr_info.csvn > 3)
+			ctrset_size = 48;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		break;
+	}
+	cpumf_ctr_setsizes[ctrset] = ctrset_size;
+}
+
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
+{
+	return cpumf_ctr_setsizes[ctrset];
+}
+
 /* Read a counter set. The counter set number determines the counter set and
  * the CPUM-CF first and second version number determine the number of
  * available counters in each counter set.
@@ -130,14 +482,13 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
 			       size_t room, bool error_ok)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 	size_t ctrset_size, need = 0;
 	int rc = 3;				/* Assume write failure */
 
 	ctrdata->def = CF_DIAG_CTRSET_DEF;
 	ctrdata->set = ctrset;
 	ctrdata->res1 = 0;
-	ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
+	ctrset_size = cpum_cf_read_setsize(ctrset);
 
 	if (ctrset_size) {			/* Save data */
 		need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
@@ -151,10 +502,6 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
 			need = 0;
 	}
 
-	debug_sprintf_event(cf_dbg, 3,
-			    "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
-			    " need %zd rc %d\n", __func__, ctrset, ctrset_size,
-			    cpuhw->info.cfvn, cpuhw->info.csvn, need, rc);
 	return need;
 }
 
@@ -213,25 +560,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
 	struct cf_trailer_entry *trailer_start, *trailer_stop;
 	struct cf_ctrset_entry *ctrstart, *ctrstop;
 	size_t offset = 0;
+	int i;
 
-	auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
-	do {
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
 		ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
 		ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
 
+		/* Counter set not authorized */
+		if (!(auth & cpumf_ctr_ctl[i]))
+			continue;
+		/* Counter set size zero was not saved */
+		if (!cpum_cf_read_setsize(i))
+			continue;
+
 		if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
 			pr_err_once("cpum_cf_diag counter set compare error "
 				    "in set %i\n", ctrstart->set);
 			return 0;
 		}
-		auth &= ~cpumf_ctr_ctl[ctrstart->set];
 		if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
 			cfdiag_diffctrset((u64 *)(ctrstart + 1),
 					  (u64 *)(ctrstop + 1), ctrstart->ctr);
 			offset += ctrstart->ctr * sizeof(u64) +
 							sizeof(*ctrstart);
 		}
-	} while (ctrstart->def && auth);
+	}
 
 	/* Save time_stamp from start of event in stop's trailer */
 	trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
@@ -259,40 +612,35 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
 	return set;
 }
 
-static int validate_ctr_version(const struct hw_perf_event *hwc,
-				enum cpumf_ctr_set set)
+static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
 {
-	struct cpu_cf_events *cpuhw;
-	int err = 0;
 	u16 mtdiag_ctl;
-
-	cpuhw = &get_cpu_var(cpu_cf_events);
+	int err = 0;
 
 	/* check required version for counter sets */
 	switch (set) {
 	case CPUMF_CTR_SET_BASIC:
 	case CPUMF_CTR_SET_USER:
-		if (cpuhw->info.cfvn < 1)
+		if (cpumf_ctr_info.cfvn < 1)
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_CRYPTO:
-		if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
-		     hwc->config > 79) ||
-		    (cpuhw->info.csvn >= 6 && hwc->config > 83))
+		if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_EXT:
-		if (cpuhw->info.csvn < 1)
+		if (cpumf_ctr_info.csvn < 1)
 			err = -EOPNOTSUPP;
-		if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
-		    (cpuhw->info.csvn == 2 && hwc->config > 175) ||
-		    (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
-		     && hwc->config > 255) ||
-		    (cpuhw->info.csvn >= 6 && hwc->config > 287))
+		if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
+		    (cpumf_ctr_info.csvn == 2 && config > 175) ||
+		    (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 255) ||
+		    (cpumf_ctr_info.csvn >= 6 && config > 287))
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_MT_DIAG:
-		if (cpuhw->info.csvn <= 3)
+		if (cpumf_ctr_info.csvn <= 3)
 			err = -EOPNOTSUPP;
 		/*
 		 * MT-diagnostic counters are read-only.  The counter set
@@ -307,35 +655,15 @@ static int validate_ctr_version(const struct hw_perf_event *hwc,
 		 * counter set is enabled and active.
 		 */
 		mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
-		if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
-		      (cpuhw->info.enable_ctl & mtdiag_ctl) &&
-		      (cpuhw->info.act_ctl & mtdiag_ctl)))
+		if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
 			err = -EOPNOTSUPP;
 		break;
 	case CPUMF_CTR_SET_MAX:
 		err = -EOPNOTSUPP;
 	}
 
-	put_cpu_var(cpu_cf_events);
-	return err;
-}
-
-static int validate_ctr_auth(const struct hw_perf_event *hwc)
-{
-	struct cpu_cf_events *cpuhw;
-	int err = 0;
-
-	cpuhw = &get_cpu_var(cpu_cf_events);
-
-	/* Check authorization for cpu counter sets.
-	 * If the particular CPU counter set is not authorized,
-	 * return with -ENOENT in order to fall back to other
-	 * PMUs that might suffice the event request.
-	 */
-	if (!(hwc->config_base & cpuhw->info.auth_ctl))
-		err = -ENOENT;
-
-	put_cpu_var(cpu_cf_events);
 	return err;
 }
 
@@ -346,20 +674,17 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc)
  */
 static void cpumf_pmu_enable(struct pmu *pmu)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	int err;
 
-	if (cpuhw->flags & PMU_F_ENABLED)
+	if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
 		return;
 
 	err = lcctl(cpuhw->state | cpuhw->dev_state);
-	if (err) {
-		pr_err("Enabling the performance measuring unit "
-		       "failed with rc=%x\n", err);
-		return;
-	}
-
-	cpuhw->flags |= PMU_F_ENABLED;
+	if (err)
+		pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags |= PMU_F_ENABLED;
 }
 
 /*
@@ -369,40 +694,26 @@ static void cpumf_pmu_enable(struct pmu *pmu)
  */
 static void cpumf_pmu_disable(struct pmu *pmu)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-	int err;
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	u64 inactive;
+	int err;
 
-	if (!(cpuhw->flags & PMU_F_ENABLED))
+	if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
 		return;
 
 	inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 	inactive |= cpuhw->dev_state;
 	err = lcctl(inactive);
-	if (err) {
-		pr_err("Disabling the performance measuring unit "
-		       "failed with rc=%x\n", err);
-		return;
-	}
-
-	cpuhw->flags &= ~PMU_F_ENABLED;
+	if (err)
+		pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags &= ~PMU_F_ENABLED;
 }
 
-
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
 /* Release the PMU if event is the last perf event */
 static void hw_perf_event_destroy(struct perf_event *event)
 {
-	if (!atomic_add_unless(&num_events, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_events) == 0)
-			__kernel_cpumcf_end();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
+	cpum_cf_free(event->cpu);
 }
 
 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -426,12 +737,10 @@ static const int cpumf_generic_events_user[] = {
 	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
 };
 
-static void cpumf_hw_inuse(void)
+static int is_userspace_event(u64 ev)
 {
-	mutex_lock(&pmc_reserve_mutex);
-	if (atomic_inc_return(&num_events) == 1)
-		__kernel_cpumcf_begin();
-	mutex_unlock(&pmc_reserve_mutex);
+	return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	       cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
 }
 
 static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
@@ -439,7 +748,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
 	enum cpumf_ctr_set set;
-	int err = 0;
 	u64 ev;
 
 	switch (type) {
@@ -456,19 +764,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 		if (is_sampling_event(event))	/* No sampling support */
 			return -ENOENT;
 		ev = attr->config;
-		/* Count user space (problem-state) only */
 		if (!attr->exclude_user && attr->exclude_kernel) {
-			if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
-				return -EOPNOTSUPP;
-			ev = cpumf_generic_events_user[ev];
-
-		/* No support for kernel space counters only */
+			/*
+			 * Count user space (problem-state) only
+			 * Handle events 32 and 33 as 0:u and 1:u
+			 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_user[ev];
+			}
 		} else if (!attr->exclude_kernel && attr->exclude_user) {
+			/* No support for kernel space counters only */
 			return -EOPNOTSUPP;
-		} else {	/* Count user and kernel space */
-			if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
-				return -EOPNOTSUPP;
-			ev = cpumf_generic_events_basic[ev];
+		} else {
+			/* Count user and kernel space, incl. events 32 + 33 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_basic[ev];
+			}
 		}
 		break;
 
@@ -505,18 +820,22 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 	}
 
 	/* Initialize for using the CPU-measurement counter facility */
-	cpumf_hw_inuse();
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
 	event->destroy = hw_perf_event_destroy;
 
-	/* Finally, validate version and authorization of the counter set */
-	err = validate_ctr_auth(hwc);
-	if (!err)
-		err = validate_ctr_version(hwc, set);
-
-	return err;
+	/*
+	 * Finally, validate version and authorization of the counter set.
+	 * If the particular CPU counter set is not authorized,
+	 * return with -ENOENT in order to fall back to other
+	 * PMUs that might suffice the event request.
+	 */
+	if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
+		return -ENOENT;
+	return validate_ctr_version(hwc->config, set);
 }
 
-/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
  * attribute::type values:
  * - PERF_TYPE_HARDWARE:
  * - pmu->type:
@@ -539,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event)
 static int cpumf_pmu_event_init(struct perf_event *event)
 {
 	unsigned int type = event->attr.type;
-	int err;
+	int err = -ENOENT;
 
 	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
 		err = __hw_perf_event_init(event, type);
 	else if (event->pmu->type == type)
 		/* Registered as unknown PMU */
 		err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
-	else
-		return -ENOENT;
-
-	if (unlikely(err) && event->destroy)
-		event->destroy(event);
 
 	return err;
 }
@@ -560,8 +874,8 @@ static int hw_perf_event_reset(struct perf_event *event)
 	u64 prev, new;
 	int err;
 
+	prev = local64_read(&event->hw.prev_count);
 	do {
-		prev = local64_read(&event->hw.prev_count);
 		err = ecctr(event->hw.config, &new);
 		if (err) {
 			if (err != 3)
@@ -573,7 +887,7 @@ static int hw_perf_event_reset(struct perf_event *event)
 			 */
 			new = 0;
 		}
-	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+	} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
 
 	return err;
 }
@@ -583,12 +897,12 @@ static void hw_perf_event_update(struct perf_event *event)
 	u64 prev, new, delta;
 	int err;
 
+	prev = local64_read(&event->hw.prev_count);
 	do {
-		prev = local64_read(&event->hw.prev_count);
 		err = ecctr(event->hw.config, &new);
 		if (err)
 			return;
-	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+	} while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
 
 	delta = (prev <= new) ? new - prev
 			      : (-1ULL - prev) + new + 1;	 /* overflow */
@@ -605,7 +919,7 @@ static void cpumf_pmu_read(struct perf_event *event)
 
 static void cpumf_pmu_start(struct perf_event *event, int flags)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	struct hw_perf_event *hwc = &event->hw;
 	int i;
 
@@ -662,17 +976,10 @@ static int cfdiag_push_sample(struct perf_event *event,
 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 		raw.frag.size = cpuhw->usedss;
 		raw.frag.data = cpuhw->stop;
-		raw.size = raw.frag.size;
-		data.raw = &raw;
+		perf_sample_save_raw_data(&data, event, &raw);
 	}
 
 	overflow = perf_event_overflow(event, &data, &regs);
-	debug_sprintf_event(cf_dbg, 3,
-			    "%s event %#llx sample_type %#llx raw %d ov %d\n",
-			    __func__, event->hw.config,
-			    event->attr.sample_type, raw.size, overflow);
-	if (overflow)
-		event->pmu->stop(event, 0);
 
 	perf_event_update_userpage(event);
 	return overflow;
@@ -680,7 +987,7 @@ static int cfdiag_push_sample(struct perf_event *event,
 
 static void cpumf_pmu_stop(struct perf_event *event, int flags)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	struct hw_perf_event *hwc = &event->hw;
 	int i;
 
@@ -707,8 +1014,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
 						      false);
 			if (cfdiag_diffctr(cpuhw, event->hw.config_base))
 				cfdiag_push_sample(event, cpuhw);
-		} else if (cpuhw->flags & PMU_F_RESERVED) {
-			/* Only update when PMU not hotplugged off */
+		} else {
 			hw_perf_event_update(event);
 		}
 		hwc->state |= PERF_HES_UPTODATE;
@@ -717,7 +1023,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
 
 static int cpumf_pmu_add(struct perf_event *event, int flags)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 
 	ctr_set_enable(&cpuhw->state, event->hw.config_base);
 	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -730,7 +1036,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
 
 static void cpumf_pmu_del(struct perf_event *event, int flags)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	int i;
 
 	cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -741,7 +1047,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
 	 *
 	 * When a new perf event has been added but not yet started, this can
 	 * clear enable control and resets all counters in a set.  Therefore,
-	 * cpumf_pmu_start() always has to reenable a counter set.
+	 * cpumf_pmu_start() always has to re-enable a counter set.
 	 */
 	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
 		if (!atomic_read(&cpuhw->ctr_set[i]))
@@ -762,31 +1068,172 @@ static struct pmu cpumf_pmu = {
 	.read	      = cpumf_pmu_read,
 };
 
+static struct cfset_session {		/* CPUs and counter set bit mask */
+	struct list_head head;		/* Head of list of active processes */
+} cfset_session = {
+	.head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0);	/* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ *   and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ *   pmu_delete(). The event itself is removed when the file descriptor is
+ *   closed.
+ */
+static int cfset_online_cpu(unsigned int cpu);
+
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+	int rc = 0;
+
+	/*
+	 * Ignore notification for perf_event_open().
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		rc = cpum_cf_alloc_cpu(cpu);
+		if (!rc)
+			cfset_online_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return rc;
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+	/*
+	 * During task exit processing of grouped perf events triggered by CPU
+	 * hotplug processing, pmu_disable() is called as part of perf context
+	 * removal process. Therefore do not trigger event removal now for
+	 * perf_event_open() created events. Perf common code triggers event
+	 * destruction when the event file descriptor is closed.
+	 *
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		cfset_offline_cpu(cpu);
+		cpum_cf_free_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return 0;
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+	return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_cf_events *cpuhw;
+
+	if (!(alert & CPU_MF_INT_CF_MASK))
+		return;
+
+	inc_irq_stat(IRQEXT_CMC);
+
+	/*
+	 * Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case.
+	 */
+	cpuhw = this_cpu_cfhw();
+	if (!cpuhw)
+		return;
+
+	/* counter authorization change alert */
+	if (alert & CPU_MF_INT_CF_CACA)
+		qctri(&cpumf_ctr_info);
+
+	/* loss of counter data alert */
+	if (alert & CPU_MF_INT_CF_LCDA)
+		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+	/* loss of MT counter data alert */
+	if (alert & CPU_MF_INT_CF_MTDA)
+		pr_warn("CPU[%i] MT counter data was lost\n",
+			smp_processor_id());
+}
+
 static int cfset_init(void);
 static int __init cpumf_pmu_init(void)
 {
 	int rc;
 
-	if (!kernel_cpumcf_avail())
+	/* Extract counter measurement facility information */
+	if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
 		return -ENODEV;
 
+	/* Determine and store counter set sizes for later reference */
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		cpum_cf_make_setsize(rc);
+
+	/*
+	 * Clear bit 15 of cr0 to unauthorize problem-state to
+	 * extract measurement counters
+	 */
+	system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT);
+
+	/* register handler for measurement-alert interruptions */
+	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				   cpumf_measurement_alert);
+	if (rc) {
+		pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+		return rc;
+	}
+
 	/* Setup s390dbf facility */
 	cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
 	if (!cf_dbg) {
 		pr_err("Registration of s390dbf(cpum_cf) failed\n");
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto out1;
 	}
 	debug_register_view(cf_dbg, &debug_sprintf_view);
 
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
 	if (rc) {
-		debug_unregister_view(cf_dbg, &debug_sprintf_view);
-		debug_unregister(cf_dbg);
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+		goto out2;
 	} else if (stccm_avail()) {	/* Setup counter set device */
 		cfset_init();
 	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+			       "perf/s390/cf:online",
+			       cpum_cf_online_cpu, cpum_cf_offline_cpu);
+	return rc;
+
+out2:
+	debug_unregister_view(cf_dbg, &debug_sprintf_view);
+	debug_unregister(cf_dbg);
+out1:
+	unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
 	return rc;
 }
 
@@ -795,19 +1242,11 @@ static int __init cpumf_pmu_init(void)
  * counter set via normal file operations.
  */
 
-static atomic_t cfset_opencnt = ATOMIC_INIT(0);		/* Access count */
-static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
 struct cfset_call_on_cpu_parm {		/* Parm struct for smp_call_on_cpu */
 	unsigned int sets;		/* Counter set bit mask */
 	atomic_t cpus_ack;		/* # CPUs successfully executed func */
 };
 
-static struct cfset_session {		/* CPUs and counter set bit mask */
-	struct list_head head;		/* Head of list of active processes */
-} cfset_session = {
-	.head = LIST_HEAD_INIT(cfset_session.head)
-};
-
 struct cfset_request {			/* CPUs and counter set bit mask */
 	unsigned long ctrset;		/* Bit mask of counter set to read */
 	cpumask_t mask;			/* CPU mask to read from */
@@ -869,11 +1308,11 @@ static void cfset_session_add(struct cfset_request *p)
 /* Stop all counter sets via ioctl interface */
 static void cfset_ioctl_off(void *parm)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	struct cfset_call_on_cpu_parm *p = parm;
 	int rc;
 
-	/* Check if any counter set used by /dev/hwc */
+	/* Check if any counter set used by /dev/hwctr */
 	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
 		if ((p->sets & cpumf_ctr_ctl[rc])) {
 			if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
@@ -890,14 +1329,12 @@ static void cfset_ioctl_off(void *parm)
 		       cpuhw->state, S390_HWCTR_DEVICE, rc);
 	if (!cpuhw->dev_state)
 		cpuhw->flags &= ~PMU_F_IN_USE;
-	debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
-			    __func__, rc, cpuhw->state, cpuhw->dev_state);
 }
 
 /* Start counter sets on particular CPU */
 static void cfset_ioctl_on(void *parm)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	struct cfset_call_on_cpu_parm *p = parm;
 	int rc;
 
@@ -913,17 +1350,13 @@ static void cfset_ioctl_on(void *parm)
 	else
 		pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
 		       cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
-	debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
-			    __func__, rc, cpuhw->state, cpuhw->dev_state);
 }
 
 static void cfset_release_cpu(void *p)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	int rc;
 
-	debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
-			    __func__, cpuhw->state, cpuhw->dev_state);
 	cpuhw->dev_state = 0;
 	rc = lcctl(cpuhw->state);	/* Keep perf_event_open counter sets */
 	if (rc)
@@ -959,27 +1392,41 @@ static int cfset_release(struct inode *inode, struct file *file)
 		kfree(file->private_data);
 		file->private_data = NULL;
 	}
-	if (!atomic_dec_return(&cfset_opencnt))
+	if (refcount_dec_and_test(&cfset_opencnt)) {	/* Last close */
 		on_each_cpu(cfset_release_cpu, NULL, 1);
+		cpum_cf_free(-1);
+	}
 	mutex_unlock(&cfset_ctrset_mutex);
-
-	hw_perf_event_destroy(NULL);
 	return 0;
 }
 
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
 static int cfset_open(struct inode *inode, struct file *file)
 {
-	if (!capable(CAP_SYS_ADMIN))
+	int rc = 0;
+
+	if (!perfmon_capable())
 		return -EPERM;
+	file->private_data = NULL;
+
 	mutex_lock(&cfset_ctrset_mutex);
-	if (atomic_inc_return(&cfset_opencnt) == 1)
-		cfset_session_init();
+	if (!refcount_inc_not_zero(&cfset_opencnt)) {	/* First open */
+		rc = cpum_cf_alloc(-1);
+		if (!rc) {
+			cfset_session_init();
+			refcount_set(&cfset_opencnt, 1);
+		}
+	}
 	mutex_unlock(&cfset_ctrset_mutex);
 
-	cpumf_hw_inuse();
-	file->private_data = NULL;
 	/* nonseekable_open() never fails */
-	return nonseekable_open(inode, file);
+	return rc ?: nonseekable_open(inode, file);
 }
 
 static int cfset_all_start(struct cfset_request *req)
@@ -998,13 +1445,11 @@ static int cfset_all_start(struct cfset_request *req)
 	if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
 		on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
 		rc = -EIO;
-		debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__);
 	}
 	free_cpumask_var(mask);
 	return rc;
 }
 
-
 /* Return the maximum required space for all possible CPUs in case one
  * CPU will be onlined during the START, READ, STOP cycles.
  * To find out the size of the counter sets, any one CPU will do. They
@@ -1012,34 +1457,32 @@ static int cfset_all_start(struct cfset_request *req)
  */
 static size_t cfset_needspace(unsigned int sets)
 {
-	struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
 	size_t bytes = 0;
 	int i;
 
 	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
 		if (!(sets & cpumf_ctr_ctl[i]))
 			continue;
-		bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+		bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
 			 sizeof(((struct s390_ctrset_setdata *)0)->set) +
 			 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
 	}
 	bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
 		(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
 		     sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
-	put_cpu_ptr(&cpu_cf_events);
 	return bytes;
 }
 
 static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
 {
 	struct s390_ctrset_read __user *ctrset_read;
-	unsigned int cpu, cpus, rc;
+	unsigned int cpu, cpus, rc = 0;
 	void __user *uptr;
 
 	ctrset_read = (struct s390_ctrset_read __user *)arg;
 	uptr = ctrset_read->data;
 	for_each_cpu(cpu, mask) {
-		struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+		struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
 		struct s390_ctrset_cpudata __user *ctrset_cpudata;
 
 		ctrset_cpudata = uptr;
@@ -1047,17 +1490,18 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
 		rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
 		rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
 				   cpuhw->used);
-		if (rc)
-			return -EFAULT;
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
 		uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
 		cond_resched();
 	}
 	cpus = cpumask_weight(mask);
 	if (put_user(cpus, &ctrset_read->no_cpus))
-		return -EFAULT;
-	debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__,
-			    uptr - (void __user *)ctrset_read->data);
-	return 0;
+		rc = -EFAULT;
+out:
+	return rc;
 }
 
 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
@@ -1080,7 +1524,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
 /* Read all counter sets. */
 static void cfset_cpu_read(void *parm)
 {
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
 	struct cfset_call_on_cpu_parm *p = parm;
 	int set, set_size;
 	size_t space;
@@ -1097,7 +1541,7 @@ static void cfset_cpu_read(void *parm)
 
 		if (!(p->sets & cpumf_ctr_ctl[set]))
 			continue;	/* Counter set not in list */
-		set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
+		set_size = cpum_cf_read_setsize(set);
 		space = sizeof(cpuhw->data) - cpuhw->used;
 		space = cfset_cpuset_read(sp, set, set_size, space);
 		if (space) {
@@ -1105,8 +1549,6 @@ static void cfset_cpu_read(void *parm)
 			cpuhw->sets += 1;
 		}
 	}
-	debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
-			    cpuhw->sets, cpuhw->used);
 }
 
 static int cfset_all_read(unsigned long arg, struct cfset_request *req)
@@ -1128,14 +1570,10 @@ static int cfset_all_read(unsigned long arg, struct cfset_request *req)
 
 static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
 {
-	struct s390_ctrset_read read;
 	int ret = -ENODATA;
 
-	if (req && req->ctrset) {
-		if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
-			return -EFAULT;
+	if (req && req->ctrset)
 		ret = cfset_all_read(arg, req);
-	}
 	return ret;
 }
 
@@ -1204,8 +1642,6 @@ static long cfset_ioctl_start(unsigned long arg, struct file *file)
 	if (!ret) {
 		cfset_session_add(preq);
 		file->private_data = preq;
-		debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n",
-				    __func__, preq->ctrset, need, ret);
 	} else {
 		kfree(preq);
 	}
@@ -1255,24 +1691,23 @@ static const struct file_operations cfset_fops = {
 	.release = cfset_release,
 	.unlocked_ioctl	= cfset_ioctl,
 	.compat_ioctl = cfset_ioctl,
-	.llseek = no_llseek
 };
 
 static struct miscdevice cfset_dev = {
 	.name	= S390_HWCTR_DEVICE,
 	.minor	= MISC_DYNAMIC_MINOR,
 	.fops	= &cfset_fops,
+	.mode	= 0666,
 };
 
 /* Hotplug add of a CPU. Scan through all active processes and add
  * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
  */
-int cfset_online_cpu(unsigned int cpu)
+static int cfset_online_cpu(unsigned int cpu)
 {
 	struct cfset_call_on_cpu_parm p;
 	struct cfset_request *rp;
 
-	mutex_lock(&cfset_ctrset_mutex);
 	if (!list_empty(&cfset_session.head)) {
 		list_for_each_entry(rp, &cfset_session.head, node) {
 			p.sets = rp->ctrset;
@@ -1280,19 +1715,18 @@ int cfset_online_cpu(unsigned int cpu)
 			cpumask_set_cpu(cpu, &rp->mask);
 		}
 	}
-	mutex_unlock(&cfset_ctrset_mutex);
 	return 0;
 }
 
 /* Hotplug remove of a CPU. Scan through all active processes and clear
  * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
  */
-int cfset_offline_cpu(unsigned int cpu)
+static int cfset_offline_cpu(unsigned int cpu)
 {
 	struct cfset_call_on_cpu_parm p;
 	struct cfset_request *rp;
 
-	mutex_lock(&cfset_ctrset_mutex);
 	if (!list_empty(&cfset_session.head)) {
 		list_for_each_entry(rp, &cfset_session.head, node) {
 			p.sets = rp->ctrset;
@@ -1300,28 +1734,22 @@ int cfset_offline_cpu(unsigned int cpu)
 			cpumask_clear_cpu(cpu, &rp->mask);
 		}
 	}
-	mutex_unlock(&cfset_ctrset_mutex);
 	return 0;
 }
 
 static void cfdiag_read(struct perf_event *event)
 {
-	debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__,
-			    event->attr.config, local64_read(&event->count));
 }
 
 static int get_authctrsets(void)
 {
-	struct cpu_cf_events *cpuhw;
 	unsigned long auth = 0;
 	enum cpumf_ctr_set i;
 
-	cpuhw = &get_cpu_var(cpu_cf_events);
 	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
-		if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+		if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
 			auth |= cpumf_ctr_ctl[i];
 	}
-	put_cpu_var(cpu_cf_events);
 	return auth;
 }
 
@@ -1355,8 +1783,6 @@ static int cfdiag_event_init2(struct perf_event *event)
 	if (!event->hw.config_base)
 		err = -EINVAL;
 
-	debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n",
-			    __func__, err, event->hw.config_base);
 	return err;
 }
 
@@ -1381,12 +1807,11 @@ static int cfdiag_event_init(struct perf_event *event)
 	}
 
 	/* Initialize for using the CPU-measurement counter facility */
-	cpumf_hw_inuse();
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
 	event->destroy = hw_perf_event_destroy;
 
 	err = cfdiag_event_init2(event);
-	if (unlikely(err))
-		event->destroy(event);
 out:
 	return err;
 }
@@ -1429,7 +1854,7 @@ static const struct attribute_group *cfdiag_attr_groups[] = {
 /* Performance monitoring unit for event CF_DIAG. Since this event
  * is also started and stopped via the perf_event_open() system call, use
  * the same event enable/disable call back functions. They do not
- * have a pointer to the perf_event strcture as first parameter.
+ * have a pointer to the perf_event structure as first parameter.
  *
  * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
  * Reuse them and distinguish the event (always first parameter) via
@@ -1459,7 +1884,7 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
 	enum cpumf_ctr_set i;
 
 	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
-		size_t size = cpum_cf_ctrset_size(i, info);
+		size_t size = cpum_cf_read_setsize(i);
 
 		if (size)
 			max_size += size * sizeof(u64) +
@@ -1493,16 +1918,12 @@ static void cfdiag_get_cpu_speed(void)
 
 static int cfset_init(void)
 {
-	struct cpumf_ctr_info info;
 	size_t need;
 	int rc;
 
-	if (qctri(&info))
-		return -ENODEV;
-
 	cfdiag_get_cpu_speed();
 	/* Make sure the counter set data fits into predefined buffer. */
-	need = cfdiag_maxsize(&info);
+	need = cfdiag_maxsize(&cpumf_ctr_info);
 	if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
 		pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
 		       need);
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
deleted file mode 100644
index 8ee48672233f..000000000000
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CPU-Measurement Counter Facility Support - Common Layer
- *
- *  Copyright IBM Corp. 2019
- *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- */
-#define KMSG_COMPONENT	"cpum_cf_common"
-#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
-#include <asm/cpu_mcf.h>
-
-/* Per-CPU event structure for the counter facility */
-DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = {
-	.ctr_set = {
-		[CPUMF_CTR_SET_BASIC]	= ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_USER]	= ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_CRYPTO]	= ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_EXT]	= ATOMIC_INIT(0),
-		[CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
-	},
-	.alert = ATOMIC64_INIT(0),
-	.state = 0,
-	.dev_state = 0,
-	.flags = 0,
-	.used = 0,
-	.usedss = 0,
-	.sets = 0
-};
-/* Indicator whether the CPU-Measurement Counter Facility Support is ready */
-static bool cpum_cf_initalized;
-
-/* CPU-measurement alerts for the counter facility */
-static void cpumf_measurement_alert(struct ext_code ext_code,
-				    unsigned int alert, unsigned long unused)
-{
-	struct cpu_cf_events *cpuhw;
-
-	if (!(alert & CPU_MF_INT_CF_MASK))
-		return;
-
-	inc_irq_stat(IRQEXT_CMC);
-	cpuhw = this_cpu_ptr(&cpu_cf_events);
-
-	/* Measurement alerts are shared and might happen when the PMU
-	 * is not reserved.  Ignore these alerts in this case. */
-	if (!(cpuhw->flags & PMU_F_RESERVED))
-		return;
-
-	/* counter authorization change alert */
-	if (alert & CPU_MF_INT_CF_CACA)
-		qctri(&cpuhw->info);
-
-	/* loss of counter data alert */
-	if (alert & CPU_MF_INT_CF_LCDA)
-		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
-
-	/* loss of MT counter data alert */
-	if (alert & CPU_MF_INT_CF_MTDA)
-		pr_warn("CPU[%i] MT counter data was lost\n",
-			smp_processor_id());
-
-	/* store alert for special handling by in-kernel users */
-	atomic64_or(alert, &cpuhw->alert);
-}
-
-#define PMC_INIT      0
-#define PMC_RELEASE   1
-static void cpum_cf_setup_cpu(void *flags)
-{
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
-	switch (*((int *) flags)) {
-	case PMC_INIT:
-		memset(&cpuhw->info, 0, sizeof(cpuhw->info));
-		qctri(&cpuhw->info);
-		cpuhw->flags |= PMU_F_RESERVED;
-		break;
-
-	case PMC_RELEASE:
-		cpuhw->flags &= ~PMU_F_RESERVED;
-		break;
-	}
-
-	/* Disable CPU counter sets */
-	lcctl(0);
-}
-
-bool kernel_cpumcf_avail(void)
-{
-	return cpum_cf_initalized;
-}
-EXPORT_SYMBOL(kernel_cpumcf_avail);
-
-/* Initialize the CPU-measurement counter facility */
-int __kernel_cpumcf_begin(void)
-{
-	int flags = PMC_INIT;
-
-	on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
-}
-EXPORT_SYMBOL(__kernel_cpumcf_begin);
-
-/* Obtain the CPU-measurement alerts for the counter facility */
-unsigned long kernel_cpumcf_alert(int clear)
-{
-	struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-	unsigned long alert;
-
-	alert = atomic64_read(&cpuhw->alert);
-	if (clear)
-		atomic64_set(&cpuhw->alert, 0);
-
-	return alert;
-}
-EXPORT_SYMBOL(kernel_cpumcf_alert);
-
-/* Release the CPU-measurement counter facility */
-void __kernel_cpumcf_end(void)
-{
-	int flags = PMC_RELEASE;
-
-	on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
-	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-EXPORT_SYMBOL(__kernel_cpumcf_end);
-
-static int cpum_cf_setup(unsigned int cpu, int flags)
-{
-	local_irq_disable();
-	cpum_cf_setup_cpu(&flags);
-	local_irq_enable();
-	return 0;
-}
-
-static int cpum_cf_online_cpu(unsigned int cpu)
-{
-	cpum_cf_setup(cpu, PMC_INIT);
-	return cfset_online_cpu(cpu);
-}
-
-static int cpum_cf_offline_cpu(unsigned int cpu)
-{
-	cfset_offline_cpu(cpu);
-	return cpum_cf_setup(cpu, PMC_RELEASE);
-}
-
-/* Return the maximum possible counter set size (in number of 8 byte counters)
- * depending on type and model number.
- */
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
-			   struct cpumf_ctr_info *info)
-{
-	size_t ctrset_size = 0;
-
-	switch (ctrset) {
-	case CPUMF_CTR_SET_BASIC:
-		if (info->cfvn >= 1)
-			ctrset_size = 6;
-		break;
-	case CPUMF_CTR_SET_USER:
-		if (info->cfvn == 1)
-			ctrset_size = 6;
-		else if (info->cfvn >= 3)
-			ctrset_size = 2;
-		break;
-	case CPUMF_CTR_SET_CRYPTO:
-		if (info->csvn >= 1 && info->csvn <= 5)
-			ctrset_size = 16;
-		else if (info->csvn == 6 || info->csvn == 7)
-			ctrset_size = 20;
-		break;
-	case CPUMF_CTR_SET_EXT:
-		if (info->csvn == 1)
-			ctrset_size = 32;
-		else if (info->csvn == 2)
-			ctrset_size = 48;
-		else if (info->csvn >= 3 && info->csvn <= 5)
-			ctrset_size = 128;
-		else if (info->csvn == 6 || info->csvn == 7)
-			ctrset_size = 160;
-		break;
-	case CPUMF_CTR_SET_MT_DIAG:
-		if (info->csvn > 3)
-			ctrset_size = 48;
-		break;
-	case CPUMF_CTR_SET_MAX:
-		break;
-	}
-
-	return ctrset_size;
-}
-
-static int __init cpum_cf_init(void)
-{
-	int rc;
-
-	if (!cpum_cf_avail())
-		return -ENODEV;
-
-	/* clear bit 15 of cr0 to unauthorize problem-state to
-	 * extract measurement counters */
-	ctl_clear_bit(0, 48);
-
-	/* register handler for measurement-alert interruptions */
-	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
-				   cpumf_measurement_alert);
-	if (rc) {
-		pr_err("Registering for CPU-measurement alerts "
-		       "failed with rc=%i\n", rc);
-		return rc;
-	}
-
-	rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
-				"perf/s390/cf:online",
-				cpum_cf_online_cpu, cpum_cf_offline_cpu);
-	if (!rc)
-		cpum_cf_initalized = true;
-
-	return rc;
-}
-early_initcall(cpum_cf_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 0d64aafd158f..7ace1f9e4ccf 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
 CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-
 CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
 CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
 CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
@@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
 CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
 CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
 CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
 CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
@@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
 CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
 CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb);
+CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc);
+CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd);
+CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce);
+CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 
 static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
 	CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
 	NULL,
 };
 
-static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = {
 	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
 	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
 	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
 	NULL,
 };
 
+static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z17, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z17, DCW_REQ),
+	CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY),
+	CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, ICW_REQ),
+	CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD),
+	CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD),
+	CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD),
+	CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD),
+	CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION),
+	CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z17, SORTL),
+	CPUMF_EVENT_PTR(cf_z17, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK),
+	CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO),
+	CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
 /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
 
 static struct attribute_group cpumcf_pmu_events_group = {
@@ -855,16 +1012,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
 	}
 
 	/* Determine version specific crypto set */
-	switch (ci.csvn) {
-	case 1 ... 5:
+	csvn = none;
+	if (ci.csvn >= 1 && ci.csvn <= 5)
 		csvn = cpumcf_svn_12345_pmu_event_attr;
-		break;
-	case 6 ... 7:
-		csvn = cpumcf_svn_67_pmu_event_attr;
-		break;
-	default:
-		csvn = none;
-	}
+	else if (ci.csvn >= 6)
+		csvn = cpumcf_svn_678_pmu_event_attr;
 
 	/* Determine model-specific counter set(s) */
 	get_cpu_id(&cpu_id);
@@ -897,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
 	case 0x3932:
 		model = cpumcf_z16_pmu_event_attr;
 		break;
+	case 0x9175:
+	case 0x9176:
+		model = cpumcf_z17_pmu_event_attr;
+		break;
 	default:
 		model = none;
 		break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..91469401f2c9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -22,6 +22,23 @@
 #include <asm/irq.h>
 #include <asm/debug.h>
 #include <asm/timex.h>
+#include <linux/io.h>
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE		0x0008	  /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
 /* Minimum number of sample-data-block-tables:
  * At least one table is required for the sampling buffer structure.
@@ -42,7 +59,7 @@
 #define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
 static inline int require_table_link(const void *sdbt)
 {
-	return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+	return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
 }
 
 /* Minimum and maximum sampling buffer sizes:
@@ -99,15 +116,55 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
 /* Debug feature */
 static debug_info_t *sfdbg;
 
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *)v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return phys_to_virt(*s & ~0x1UL);
+}
+
 /*
  * sf_disable() - Switch off sampling facility
  */
-static int sf_disable(void)
+static void sf_disable(void)
 {
 	struct hws_lsctl_request_block sreq;
 
 	memset(&sreq, 0, sizeof(sreq));
-	return lsctl(&sreq);
+	lsctl(&sreq);
 }
 
 /*
@@ -123,57 +180,44 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
  */
 static void free_sampling_buffer(struct sf_buffer *sfb)
 {
-	unsigned long *sdbt, *curr;
-
-	if (!sfb->sdbt)
-		return;
+	unsigned long *sdbt, *curr, *head;
 
 	sdbt = sfb->sdbt;
-	curr = sdbt;
-
+	if (!sdbt)
+		return;
+	sfb->sdbt = NULL;
 	/* Free the SDBT after all SDBs are processed... */
-	while (1) {
-		if (!*curr || !sdbt)
-			break;
-
-		/* Process table-link entries */
+	head = sdbt;
+	curr = sdbt;
+	do {
 		if (is_link_entry(curr)) {
+			/* Process table-link entries */
 			curr = get_next_sdbt(curr);
-			if (sdbt)
-				free_page((unsigned long) sdbt);
-
-			/* If the origin is reached, sampling buffer is freed */
-			if (curr == sfb->sdbt)
-				break;
-			else
-				sdbt = curr;
+			free_page((unsigned long)sdbt);
+			sdbt = curr;
 		} else {
 			/* Process SDB pointer */
-			if (*curr) {
-				free_page(*curr);
-				curr++;
-			}
+			free_page((unsigned long)phys_to_virt(*curr));
+			curr++;
 		}
-	}
-
-	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
-			    (unsigned long)sfb->sdbt);
+	} while (curr != head);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
 static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
 {
-	unsigned long sdb, *trailer;
+	struct hws_trailer_entry *te;
+	unsigned long sdb;
 
 	/* Allocate and initialize sample-data-block */
 	sdb = get_zeroed_page(gfp_flags);
 	if (!sdb)
 		return -ENOMEM;
-	trailer = trailer_entry_ptr(sdb);
-	*trailer = SDB_TE_ALERT_REQ_MASK;
+	te = trailer_entry_ptr(sdb);
+	te->header.a = 1;
 
 	/* Link SDB into the sample-data-block-table */
-	*sdbt = sdb;
+	*sdbt = virt_to_phys((void *)sdb);
 
 	return 0;
 }
@@ -212,10 +256,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 * the sampling buffer origin.
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
-		debug_sprintf_event(sfdbg, 3, "%s: "
-				    "sampling buffer is not linked: origin %#lx"
-				    " tail %#lx\n", __func__,
-				    (unsigned long)sfb->sdbt,
+		debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+				    __func__, (unsigned long)sfb->sdbt,
 				    (unsigned long)tail);
 		return -EINVAL;
 	}
@@ -225,14 +267,14 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	for (i = 0; i < num_sdb; i++) {
 		/* Allocate a new SDB-table if it is full. */
 		if (require_table_link(tail)) {
-			new = (unsigned long *) get_zeroed_page(gfp_flags);
+			new = (unsigned long *)get_zeroed_page(gfp_flags);
 			if (!new) {
 				rc = -ENOMEM;
 				break;
 			}
 			sfb->num_sdbt++;
 			/* Link current page to tail of chain */
-			*tail = (unsigned long)(void *) new + 1;
+			*tail = virt_to_phys((void *)new) + 1;
 			tail_prev = tail;
 			tail = new;
 		}
@@ -251,7 +293,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 			 */
 			if (tail_prev) {
 				sfb->num_sdbt--;
-				free_page((unsigned long) new);
+				free_page((unsigned long)new);
 				tail = tail_prev;
 			}
 			break;
@@ -262,12 +304,9 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	}
 
 	/* Link sampling buffer to its origin */
-	*tail = (unsigned long) sfb->sdbt + 1;
+	*tail = virt_to_phys(sfb->sdbt) + 1;
 	sfb->tail = tail;
 
-	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
-			    " settings: sdbt %lu sdb %lu\n", __func__,
-			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
 
@@ -290,7 +329,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 		return -EINVAL;
 
 	/* Allocate the sample-data-block-table origin */
-	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 	if (!sfb->sdbt)
 		return -ENOMEM;
 	sfb->num_sdb = 0;
@@ -300,19 +339,12 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 	 * realloc_sampling_buffer() invocation.
 	 */
 	sfb->tail = sfb->sdbt;
-	*sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+	*sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
 
 	/* Allocate requested number of sample-data-blocks */
 	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
-	if (rc) {
+	if (rc)
 		free_sampling_buffer(sfb);
-		debug_sprintf_event(sfdbg, 4, "%s: "
-			"realloc_sampling_buffer failed with rc %i\n",
-			__func__, rc);
-	} else
-		debug_sprintf_event(sfdbg, 4,
-			"%s: tear %#lx dear %#lx\n", __func__,
-			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
 	return rc;
 }
 
@@ -324,8 +356,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
 	CPUM_SF_MAX_SDB = max;
 
 	memset(&si, 0, sizeof(si));
-	if (!qsi(&si))
-		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+	qsi(&si);
+	CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
 }
 
 static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -344,12 +376,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
 	return 0;
 }
 
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
-				   struct hw_perf_event *hwc)
-{
-	return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
 {
 	/* Limit the number of SDBs to not exceed the maximum */
@@ -366,14 +392,13 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
 
 static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
 {
-	if (cpuhw->sfb.sdbt)
+	if (sf_buffer_available(cpuhw))
 		free_sampling_buffer(&cpuhw->sfb);
 }
 
 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
 	unsigned long n_sdb, freq;
-	size_t sample_size;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
@@ -404,7 +429,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
 	 *	 to 511 SDBs).
 	 */
-	sample_size = sizeof(struct hws_basic_entry);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
 	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
 
@@ -420,12 +444,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 	if (sf_buffer_available(cpuhw))
 		return 0;
 
-	debug_sprintf_event(sfdbg, 3,
-			    "%s: rate %lu f %lu sdb %lu/%lu"
-			    " sample_size %lu cpuhw %p\n", __func__,
-			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
-			    sample_size, cpuhw);
-
 	return alloc_sampling_buffer(&cpuhw->sfb,
 				     sfb_pending_allocs(&cpuhw->sfb, hwc));
 }
@@ -482,8 +500,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
-			    __func__, OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -501,13 +517,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 static void extend_sampling_buffer(struct sf_buffer *sfb,
 				   struct hw_perf_event *hwc)
 {
-	unsigned long num, num_old;
-	int rc;
+	unsigned long num;
 
 	num = sfb_pending_allocs(sfb, hwc);
 	if (!num)
 		return;
-	num_old = sfb->num_sdb;
 
 	/* Disable the sampling facility to reset any states and also
 	 * clear pending measurement alerts.
@@ -519,62 +533,32 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
 	 * called by perf.  Because this is a reallocation, it is fine if the
 	 * new SDB-request cannot be satisfied immediately.
 	 */
-	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
-	if (rc)
-		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
-				    __func__, rc);
-
-	if (sfb_has_pending_allocs(sfb, hwc))
-		debug_sprintf_event(sfdbg, 5, "%s: "
-				    "req %lu alloc %lu remaining %lu\n",
-				    __func__, num, sfb->num_sdb - num_old,
-				    sfb_pending_allocs(sfb, hwc));
+	realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 }
 
 /* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
 /* Used to avoid races in calling reserve/release_cpumf_hardware */
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 #define PMC_INIT      0
 #define PMC_RELEASE   1
-#define PMC_FAILURE   2
 static void setup_pmc_cpu(void *flags)
 {
-	int err;
-	struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 
-	err = 0;
-	switch (*((int *) flags)) {
+	sf_disable();
+	switch (*((int *)flags)) {
 	case PMC_INIT:
-		memset(cpusf, 0, sizeof(*cpusf));
-		err = qsi(&cpusf->qsi);
-		if (err)
-			break;
-		cpusf->flags |= PMU_F_RESERVED;
-		err = sf_disable();
-		if (err)
-			pr_err("Switching off the sampling facility failed "
-			       "with rc %i\n", err);
-		debug_sprintf_event(sfdbg, 5,
-				    "%s: initialized: cpuhw %p\n", __func__,
-				    cpusf);
+		memset(cpuhw, 0, sizeof(*cpuhw));
+		qsi(&cpuhw->qsi);
+		cpuhw->flags |= PMU_F_RESERVED;
 		break;
 	case PMC_RELEASE:
-		cpusf->flags &= ~PMU_F_RESERVED;
-		err = sf_disable();
-		if (err) {
-			pr_err("Switching off the sampling facility failed "
-			       "with rc %i\n", err);
-		} else
-			deallocate_buffers(cpusf);
-		debug_sprintf_event(sfdbg, 5,
-				    "%s: released: cpuhw %p\n", __func__,
-				    cpusf);
+		cpuhw->flags &= ~PMU_F_RESERVED;
+		deallocate_buffers(cpuhw);
 		break;
 	}
-	if (err)
-		*((int *) flags) |= PMC_FAILURE;
 }
 
 static void release_pmc_hardware(void)
@@ -585,27 +569,19 @@ static void release_pmc_hardware(void)
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 }
 
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
 
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	if (flags & PMC_FAILURE) {
-		release_pmc_hardware();
-		return -ENODEV;
-	}
 	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
 }
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	/* Release PMC if this is the last perf event */
-	if (!atomic_add_unless(&num_events, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_events) == 0)
-			release_pmc_hardware();
+	if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 }
@@ -671,7 +647,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
 	/* Protect callchain buffers, tasks */
 	rcu_read_lock();
 
-	perf_prepare_sample(&header, data, event, regs);
+	perf_prepare_sample(data, event, regs);
+	perf_prepare_header(&header, data, event, regs);
 	if (perf_output_begin(&handle, data, event, header.size))
 		goto out;
 
@@ -708,9 +685,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
 		 */
 		if (sample_rate_to_freq(si, rate) >
 		    sysctl_perf_event_sample_rate) {
-			debug_sprintf_event(sfdbg, 1, "%s: "
-					    "Sampling rate exceeds maximum "
-					    "perf sample rate\n", __func__);
 			rate = 0;
 		}
 	}
@@ -755,9 +729,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
 	attr->sample_period = rate;
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
-	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
-			    __func__, event->cpu, event->attr.sample_period,
-			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
 	return 0;
 }
 
@@ -767,23 +738,16 @@ static int __hw_perf_event_init(struct perf_event *event)
 	struct hws_qsi_info_block si;
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
-	int cpu, err;
+	int cpu, err = 0;
 
 	/* Reserve CPU-measurement sampling facility */
-	err = 0;
-	if (!atomic_inc_not_zero(&num_events)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
-			err = -EBUSY;
-		else
-			atomic_inc(&num_events);
-		mutex_unlock(&pmc_reserve_mutex);
+	mutex_lock(&pmc_reserve_mutex);
+	if (!refcount_inc_not_zero(&num_events)) {
+		reserve_pmc_hardware();
+		refcount_set(&num_events, 1);
 	}
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		goto out;
-
 	/* Access per-CPU sampling information (query sampling info) */
 	/*
 	 * The event->cpu value can be -1 to count on every CPU, for example,
@@ -795,9 +759,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	memset(&si, 0, sizeof(si));
 	cpuhw = NULL;
-	if (event->cpu == -1)
+	if (event->cpu == -1) {
 		qsi(&si);
-	else {
+	} else {
 		/* Event is pinned to a particular CPU, retrieve the per-CPU
 		 * sampling structure for accessing the CPU-specific QSI.
 		 */
@@ -834,21 +798,13 @@ static int __hw_perf_event_init(struct perf_event *event)
 		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
 	}
 
-	/* Check and set other sampling flags */
-	if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
-		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
-
 	err =  __hw_perf_event_init_rate(event, &si);
 	if (err)
 		goto out;
 
-	/* Initialize sample data overflow accounting */
-	hwc->extra_reg.reg = REG_OVERFLOW;
-	OVERFLOW_REG(hwc) = 0;
-
 	/* Use AUX buffer. No need to allocate it by ourself */
 	if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
-		return 0;
+		goto out;
 
 	/* Allocate the per-CPU sampling buffer using the CPU information
 	 * from the event.  If the event is not pinned to a particular
@@ -878,6 +834,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 		if (is_default_overflow_handler(event))
 			event->overflow_handler = cpumsf_output_event_pid;
 out:
+	mutex_unlock(&pmc_reserve_mutex);
 	return err;
 }
 
@@ -919,10 +876,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
-	/* Check online status of the CPU to which the event is pinned */
-	if (event->cpu >= 0 && !cpu_online(event->cpu))
-		return -ENODEV;
-
 	/* Force reset of idle/hv excludes regardless of what the
 	 * user requested.
 	 */
@@ -932,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 		event->attr.exclude_idle = 0;
 
 	err = __hw_perf_event_init(event);
-	if (unlikely(err))
-		if (event->destroy)
-			event->destroy(event);
 	return err;
 }
 
@@ -944,10 +894,14 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	struct hw_perf_event *hwc;
 	int err;
 
-	if (cpuhw->flags & PMU_F_ENABLED)
-		return;
-
-	if (cpuhw->flags & PMU_F_ERR_MASK)
+	/*
+	 * Event must be
+	 * - added/started on this CPU (PMU_F_IN_USE set)
+	 * - and CPU must be available (PMU_F_RESERVED set)
+	 * - and not already enabled (PMU_F_ENABLED not set)
+	 * - and not in error condition (PMU_F_ERR_MASK not set)
+	 */
+	if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED))
 		return;
 
 	/* Check whether to extent the sampling buffer.
@@ -961,40 +915,27 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	 * facility, but it can be fully re-enabled using sampling controls that
 	 * have been saved in cpumsf_pmu_disable().
 	 */
-	if (cpuhw->event) {
-		hwc = &cpuhw->event->hw;
-		if (!(SAMPL_DIAG_MODE(hwc))) {
-			/*
-			 * Account number of overflow-designated
-			 * buffer extents
-			 */
-			sfb_account_overflows(cpuhw, hwc);
-			extend_sampling_buffer(&cpuhw->sfb, hwc);
-		}
-		/* Rate may be adjusted with ioctl() */
-		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+	hwc = &cpuhw->event->hw;
+	if (!(SAMPL_DIAG_MODE(hwc))) {
+		/*
+		 * Account number of overflow-designated buffer extents
+		 */
+		sfb_account_overflows(cpuhw, hwc);
+		extend_sampling_buffer(&cpuhw->sfb, hwc);
 	}
+	/* Rate may be adjusted with ioctl() */
+	cpuhw->lsctl.interval = SAMPL_RATE(hwc);
 
 	/* (Re)enable the PMU and sampling facility */
-	cpuhw->flags |= PMU_F_ENABLED;
-	barrier();
-
 	err = lsctl(&cpuhw->lsctl);
 	if (err) {
-		cpuhw->flags &= ~PMU_F_ENABLED;
-		pr_err("Loading sampling controls failed: op %i err %i\n",
-			1, err);
+		pr_err("Loading sampling controls failed: op 1 err %i\n", err);
 		return;
 	}
 
 	/* Load current program parameter */
-	lpp(&S390_lowcore.lpp);
-
-	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
-			    "interval %#lx tear %#lx dear %#lx\n", __func__,
-			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
-			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
-			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+	lpp(&get_lowcore()->lpp);
+	cpuhw->flags |= PMU_F_ENABLED;
 }
 
 static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1017,31 +958,27 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 
 	err = lsctl(&inactive);
 	if (err) {
-		pr_err("Loading sampling controls failed: op %i err %i\n",
-			2, err);
+		pr_err("Loading sampling controls failed: op 2 err %i\n", err);
 		return;
 	}
 
-	/* Save state of TEAR and DEAR register contents */
-	err = qsi(&si);
-	if (!err) {
-		/* TEAR/DEAR values are valid only if the sampling facility is
-		 * enabled.  Note that cpumsf_pmu_disable() might be called even
-		 * for a disabled sampling facility because cpumsf_pmu_enable()
-		 * controls the enable/disable state.
-		 */
-		if (si.es) {
-			cpuhw->lsctl.tear = si.tear;
-			cpuhw->lsctl.dear = si.dear;
-		}
-	} else
-		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
-				    __func__, err);
+	/*
+	 * Save state of TEAR and DEAR register contents.
+	 * TEAR/DEAR values are valid only if the sampling facility is
+	 * enabled.  Note that cpumsf_pmu_disable() might be called even
+	 * for a disabled sampling facility because cpumsf_pmu_enable()
+	 * controls the enable/disable state.
+	 */
+	qsi(&si);
+	if (si.es) {
+		cpuhw->lsctl.tear = si.tear;
+		cpuhw->lsctl.dear = si.dear;
+	}
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
 
-/* perf_exclude_event() - Filter event
+/* perf_event_exclude() - Filter event
  * @event:	The perf event
  * @regs:	pt_regs structure
  * @sde_regs:	Sample-data-entry (sde) regs structure
@@ -1050,7 +987,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
  *
  * Return non-zero if the event shall be excluded.
  */
-static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
 			      struct perf_sf_sde_regs *sde_regs)
 {
 	if (event->attr.exclude_user && user_mode(regs))
@@ -1133,12 +1070,9 @@ static int perf_push_sample(struct perf_event *event,
 	data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
 
 	overflow = 0;
-	if (perf_exclude_event(event, &regs, sde_regs))
+	if (perf_event_exclude(event, &regs, sde_regs))
 		goto out;
-	if (perf_event_overflow(event, &data, &regs)) {
-		overflow = 1;
-		event->pmu->stop(event, 0);
-	}
+	overflow = perf_event_overflow(event, &data, &regs);
 	perf_event_update_userpage(event);
 out:
 	return overflow;
@@ -1175,9 +1109,9 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 	struct hws_trailer_entry *te;
 	struct hws_basic_entry *sample;
 
-	te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
-	sample = (struct hws_basic_entry *) *sdbt;
-	while ((unsigned long *) sample < (unsigned long *) te) {
+	te = trailer_entry_ptr((unsigned long)sdbt);
+	sample = (struct hws_basic_entry *)sdbt;
+	while ((unsigned long *)sample < (unsigned long *)te) {
 		/* Check for an empty sample */
 		if (!sample->def || sample->LS)
 			break;
@@ -1202,11 +1136,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 				/* Count discarded samples */
 				*overflow += 1;
 		} else {
-			debug_sprintf_event(sfdbg, 4,
-					    "%s: Found unknown"
-					    " sampling data entry: te->f %i"
-					    " basic.def %#4x (%p)\n", __func__,
-					    te->f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
 			 * This condition can occur if the buffer was reused
@@ -1217,7 +1146,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
 			 * that are not full.  Stop processing if the first
 			 * invalid format was detected.
 			 */
-			if (!te->f)
+			if (!te->header.f)
 				break;
 		}
 
@@ -1235,71 +1164,62 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
  * The sampling buffer position are retrieved and saved in the TEAR_REG
  * register of the specified perf event.
  *
- * Only full sample-data-blocks are processed.	Specify the flash_all flag
- * to also walk through partially filled sample-data-blocks.  It is ignored
- * if PERF_CPUM_SF_FULL_BLOCKS is set.	The PERF_CPUM_SF_FULL_BLOCKS flag
- * enforces the processing of full sample-data-blocks only (trailer entries
- * with the block-full-indicator bit set).
+ * Only full sample-data-blocks are processed.	Specify the flush_all flag
+ * to also walk through partially filled sample-data-blocks.
  */
 static void hw_perf_event_update(struct perf_event *event, int flush_all)
 {
+	unsigned long long event_overflow, sampl_overflow, num_sdb;
 	struct hw_perf_event *hwc = &event->hw;
+	union hws_trailer_header prev, new;
 	struct hws_trailer_entry *te;
-	unsigned long *sdbt;
-	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+	unsigned long *sdbt, sdb;
 	int done;
 
 	/*
 	 * AUX buffer is used when in diagnostic sampling mode.
 	 * No perf events/samples are created.
 	 */
-	if (SAMPL_DIAG_MODE(&event->hw))
+	if (SAMPL_DIAG_MODE(hwc))
 		return;
 
-	if (flush_all && SDB_FULL_BLOCKS(hwc))
-		flush_all = 0;
-
-	sdbt = (unsigned long *) TEAR_REG(hwc);
+	sdbt = (unsigned long *)TEAR_REG(hwc);
 	done = event_overflow = sampl_overflow = num_sdb = 0;
 	while (!done) {
 		/* Get the trailer entry of the sample-data-block */
-		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+		sdb = (unsigned long)phys_to_virt(*sdbt);
+		te = trailer_entry_ptr(sdb);
 
 		/* Leave loop if no more work to do (block full indicator) */
-		if (!te->f) {
+		if (!te->header.f) {
 			done = 1;
 			if (!flush_all)
 				break;
 		}
 
 		/* Check the sample overflow count */
-		if (te->overflow)
+		if (te->header.overflow)
 			/* Account sample overflows and, if a particular limit
 			 * is reached, extend the sampling buffer.
 			 * For details, see sfb_account_overflows().
 			 */
-			sampl_overflow += te->overflow;
-
-		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
-				    "overflow %llu timestamp %#llx\n",
-				    __func__, (unsigned long)sdbt, te->overflow,
-				    (te->f) ? trailer_timestamp(te) : 0ULL);
+			sampl_overflow += te->header.overflow;
 
 		/* Collect all samples from a single sample-data-block and
 		 * flag if an (perf) event overflow happened.  If so, the PMU
 		 * is stopped and remaining samples will be discarded.
 		 */
-		hw_collect_samples(event, sdbt, &event_overflow);
+		hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
 		num_sdb++;
 
 		/* Reset trailer (using compare-double-and-swap) */
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
 		do {
-			te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
-			te_flags |= SDB_TE_ALERT_REQ_MASK;
-		} while (!cmpxchg_double(&te->flags, &te->overflow,
-					 te->flags, te->overflow,
-					 te_flags, 0ULL));
+			new.val = prev.val;
+			new.f = 0;
+			new.a = 1;
+			new.overflow = 0;
+		} while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
 
 		/* Advance to next sample-data-block */
 		sdbt++;
@@ -1307,7 +1227,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sdbt = get_next_sdbt(sdbt);
 
 		/* Update event hardware registers */
-		TEAR_REG(hwc) = (unsigned long) sdbt;
+		TEAR_REG(hwc) = (unsigned long)sdbt;
 
 		/* Stop processing sample-data if all samples of the current
 		 * sample-data-block were flushed even if it was not full.
@@ -1329,25 +1249,30 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	 * are dropped.
 	 * Slightly increase the interval to avoid hitting this limit.
 	 */
-	if (event_overflow) {
+	if (event_overflow)
 		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
-		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
-				    __func__,
-				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
-	}
+}
+
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+					  unsigned long i)
+{
+	return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+	return end >= start ? end - start + 1 : 0;
+}
 
-	if (sampl_overflow || event_overflow)
-		debug_sprintf_event(sfdbg, 4, "%s: "
-				    "overflows: sample %llu event %llu"
-				    " total %llu num_sdb %llu\n",
-				    __func__, sampl_overflow, event_overflow,
-				    OVERFLOW_REG(hwc), num_sdb);
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->alert_mark);
 }
 
-#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
-#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
-#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
-#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->empty_mark);
+}
 
 /*
  * Get trailer entry by index of SDB.
@@ -1357,9 +1282,9 @@ static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
 {
 	unsigned long sdb;
 
-	index = AUX_SDB_INDEX(aux, index);
+	index = aux_sdb_index(aux, index);
 	sdb = aux->sdb_index[index];
-	return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+	return trailer_entry_ptr(sdb);
 }
 
 /*
@@ -1381,10 +1306,10 @@ static void aux_output_end(struct perf_output_handle *handle)
 	if (!aux)
 		return;
 
-	range_scan = AUX_SDB_NUM_ALERT(aux);
+	range_scan = aux_sdb_num_alert(aux);
 	for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
 		te = aux_sdb_trailer(aux, idx);
-		if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+		if (!te->header.f)
 			break;
 	}
 	/* i is num of SDBs which are full */
@@ -1392,10 +1317,7 @@ static void aux_output_end(struct perf_output_handle *handle)
 
 	/* Remove alert indicators in the buffer */
 	te = aux_sdb_trailer(aux, aux->alert_mark);
-	te->flags &= ~SDB_TE_ALERT_REQ_MASK;
-
-	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
-			    __func__, i, range_scan, aux->head);
+	te->header.a = 0;
 }
 
 /*
@@ -1411,12 +1333,10 @@ static int aux_output_begin(struct perf_output_handle *handle,
 			    struct aux_buffer *aux,
 			    struct cpu_hw_sf *cpuhw)
 {
-	unsigned long range;
-	unsigned long i, range_scan, idx;
-	unsigned long head, base, offset;
+	unsigned long range, i, range_scan, idx, head, base, offset;
 	struct hws_trailer_entry *te;
 
-	if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+	if (handle->head & ~PAGE_MASK)
 		return -EINVAL;
 
 	aux->head = handle->head >> PAGE_SHIFT;
@@ -1428,18 +1348,14 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	 * SDBs between aux->head and aux->empty_mark are already ready
 	 * for new data. range_scan is num of SDBs not within them.
 	 */
-	debug_sprintf_event(sfdbg, 6,
-			    "%s: range %ld head %ld alert %ld empty %ld\n",
-			    __func__, range, aux->head, aux->alert_mark,
-			    aux->empty_mark);
-	if (range > AUX_SDB_NUM_EMPTY(aux)) {
-		range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+	if (range > aux_sdb_num_empty(aux)) {
+		range_scan = range - aux_sdb_num_empty(aux);
 		idx = aux->empty_mark + 1;
 		for (i = 0; i < range_scan; i++, idx++) {
 			te = aux_sdb_trailer(aux, idx);
-			te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
-				       SDB_TE_ALERT_REQ_MASK);
-			te->overflow = 0;
+			te->header.f = 0;
+			te->header.a = 0;
+			te->header.overflow = 0;
 		}
 		/* Save the position of empty SDBs */
 		aux->empty_mark = aux->head + range - 1;
@@ -1448,20 +1364,14 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	/* Set alert indicator */
 	aux->alert_mark = aux->head + range/2 - 1;
 	te = aux_sdb_trailer(aux, aux->alert_mark);
-	te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+	te->header.a = 1;
 
 	/* Reset hardware buffer head */
-	head = AUX_SDB_INDEX(aux, aux->head);
+	head = aux_sdb_index(aux, aux->head);
 	base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
 	offset = head % CPUM_SF_SDB_PER_TABLE;
-	cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
-	cpuhw->lsctl.dear = aux->sdb_index[head];
-
-	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
-			    "index %ld tear %#lx dear %#lx\n", __func__,
-			    aux->head, aux->alert_mark, aux->empty_mark,
-			    head / CPUM_SF_SDB_PER_TABLE,
-			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+	cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+	cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
 
 	return 0;
 }
@@ -1475,14 +1385,15 @@ static int aux_output_begin(struct perf_output_handle *handle,
 static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
 			  unsigned long long *overflow)
 {
-	unsigned long long orig_overflow, orig_flags, new_flags;
+	union hws_trailer_header prev, new;
 	struct hws_trailer_entry *te;
 
 	te = aux_sdb_trailer(aux, alert_index);
+	prev.val = READ_ONCE_ALIGNED_128(te->header.val);
 	do {
-		orig_flags = te->flags;
-		*overflow = orig_overflow = te->overflow;
-		if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+		new.val = prev.val;
+		*overflow = prev.overflow;
+		if (prev.f) {
 			/*
 			 * SDB is already set by hardware.
 			 * Abort and try to set somewhere
@@ -1490,10 +1401,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
 			 */
 			return false;
 		}
-		new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
-	} while (!cmpxchg_double(&te->flags, &te->overflow,
-				 orig_flags, orig_overflow,
-				 new_flags, 0ULL));
+		new.a = 1;
+		new.overflow = 0;
+	} while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
 	return true;
 }
 
@@ -1522,14 +1432,12 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
 static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 			     unsigned long long *overflow)
 {
-	unsigned long long orig_overflow, orig_flags, new_flags;
-	unsigned long i, range_scan, idx, idx_old;
+	union hws_trailer_header prev, new;
+	unsigned long i, range_scan, idx;
+	unsigned long long orig_overflow;
 	struct hws_trailer_entry *te;
 
-	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
-			    "empty %ld\n", __func__, range, aux->head,
-			    aux->alert_mark, aux->empty_mark);
-	if (range <= AUX_SDB_NUM_EMPTY(aux))
+	if (range <= aux_sdb_num_empty(aux))
 		/*
 		 * No need to scan. All SDBs in range are marked as empty.
 		 * Just set alert indicator. Should check race with hardware
@@ -1550,30 +1458,27 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
 	 * Start scanning from one SDB behind empty_mark. If the new alert
 	 * indicator fall into this range, set it.
 	 */
-	range_scan = range - AUX_SDB_NUM_EMPTY(aux);
-	idx_old = idx = aux->empty_mark + 1;
+	range_scan = range - aux_sdb_num_empty(aux);
+	idx = aux->empty_mark + 1;
 	for (i = 0; i < range_scan; i++, idx++) {
 		te = aux_sdb_trailer(aux, idx);
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
 		do {
-			orig_flags = te->flags;
-			orig_overflow = te->overflow;
-			new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+			new.val = prev.val;
+			orig_overflow = prev.overflow;
+			new.f = 0;
+			new.overflow = 0;
 			if (idx == aux->alert_mark)
-				new_flags |= SDB_TE_ALERT_REQ_MASK;
+				new.a = 1;
 			else
-				new_flags &= ~SDB_TE_ALERT_REQ_MASK;
-		} while (!cmpxchg_double(&te->flags, &te->overflow,
-					 orig_flags, orig_overflow,
-					 new_flags, 0ULL));
+				new.a = 0;
+		} while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
 		*overflow += orig_overflow;
 	}
 
 	/* Update empty_mark to new position */
 	aux->empty_mark = aux->head + range - 1;
 
-	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
-			    "empty %ld\n", __func__, range_scan, idx_old,
-			    idx - 1, aux->empty_mark);
 	return true;
 }
 
@@ -1590,12 +1495,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 	unsigned long num_sdb;
 
 	aux = perf_get_aux(handle);
-	if (WARN_ON_ONCE(!aux))
+	if (!aux)
 		return;
 
 	/* Inform user space new data arrived */
-	size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
-	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+	size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+	debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
 			    size >> PAGE_SHIFT);
 	perf_aux_output_end(handle, size);
 
@@ -1607,12 +1512,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			pr_err("The AUX buffer with %lu pages for the "
 			       "diagnostic-sampling mode is full\n",
 				num_sdb);
-			debug_sprintf_event(sfdbg, 1,
-					    "%s: AUX buffer used up\n",
-					    __func__);
 			break;
 		}
-		if (WARN_ON_ONCE(!aux))
+		if (!aux)
 			return;
 
 		/* Update head and alert_mark to new position */
@@ -1632,23 +1534,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
 			       "pages to overflow\n", aux->sfb.num_sdb);
-			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
-					    "overflow %lld\n", __func__,
-					    aux->head, range, overflow);
 		} else {
-			size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+			size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
-					    "already full, try another\n",
-					    __func__,
-					    aux->head, aux->alert_mark);
 		}
 	}
-
-	if (done)
-		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
-				    "empty %ld\n", __func__, aux->head,
-				    aux->alert_mark, aux->empty_mark);
 }
 
 /*
@@ -1670,15 +1560,13 @@ static void aux_buffer_free(void *data)
 	kfree(aux->sdbt_index);
 	kfree(aux->sdb_index);
 	kfree(aux);
-
-	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
 }
 
 static void aux_sdb_init(unsigned long sdb)
 {
 	struct hws_trailer_entry *te;
 
-	te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+	te = trailer_entry_ptr(sdb);
 
 	/* Save clock base */
 	te->clock_base = 1;
@@ -1741,7 +1629,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 
 	/* Allocate the first SDBT */
 	sfb->num_sdbt = 0;
-	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 	if (!sfb->sdbt)
 		goto no_sdbt;
 	aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
@@ -1753,23 +1641,23 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	 */
 	for (i = 0; i < nr_pages; i++, tail++) {
 		if (require_table_link(tail)) {
-			new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+			new = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 			if (!new)
 				goto no_sdbt;
 			aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
 			/* Link current page to tail of chain */
-			*tail = (unsigned long)(void *) new + 1;
+			*tail = virt_to_phys(new) + 1;
 			tail = new;
 		}
 		/* Tail is the entry in a SDBT */
-		*tail = (unsigned long)pages[i];
+		*tail = virt_to_phys(pages[i]);
 		aux->sdb_index[i] = (unsigned long)pages[i];
 		aux_sdb_init((unsigned long)pages[i]);
 	}
 	sfb->num_sdb = nr_pages;
 
 	/* Link the last entry in the SDBT to the first SDBT */
-	*tail = (unsigned long) sfb->sdbt + 1;
+	*tail = virt_to_phys(sfb->sdbt) + 1;
 	sfb->tail = tail;
 
 	/*
@@ -1779,9 +1667,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	 */
 	aux->empty_mark = sfb->num_sdb - 1;
 
-	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
-			    sfb->num_sdbt, sfb->num_sdb);
-
 	return aux;
 
 no_sdbt:
@@ -1802,7 +1687,7 @@ static void cpumsf_pmu_read(struct perf_event *event)
 	/* Nothing to do ... updates are interrupt-driven */
 }
 
-/* Check if the new sampling period/freqeuncy is appropriate.
+/* Check if the new sampling period/frequency is appropriate.
  *
  * Return non-zero on error and zero on passed checks.
  */
@@ -1814,8 +1699,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 
 	memset(&si, 0, sizeof(si));
 	if (event->cpu == -1) {
-		if (qsi(&si))
-			return -ENODEV;
+		qsi(&si);
 	} else {
 		/* Event is pinned to a particular CPU, retrieve the per-CPU
 		 * sampling structure for accessing the CPU-specific QSI.
@@ -1825,7 +1709,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 		si = cpuhw->qsi;
 	}
 
-	do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+	do_freq = !!SAMPL_FREQ_MODE(&event->hw);
 	rate = getrate(do_freq, value, &si);
 	if (!rate)
 		return -EINVAL;
@@ -1833,10 +1717,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 	event->attr.sample_period = rate;
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
-	debug_sprintf_event(sfdbg, 4, "%s:"
-			    " cpu %d value %#llx period %#llx freq %d\n",
-			    __func__, event->cpu, value,
-			    event->attr.sample_period, do_freq);
 	return 0;
 }
 
@@ -1847,12 +1727,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
 {
 	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 
-	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+	if (!(event->hw.state & PERF_HES_STOPPED))
 		return;
-
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
 	perf_pmu_disable(event->pmu);
 	event->hw.state = 0;
 	cpuhw->lsctl.cs = 1;
@@ -1877,7 +1753,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
 	event->hw.state |= PERF_HES_STOPPED;
 
 	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
-		hw_perf_event_update(event, 1);
+		/* CPU hotplug off removes SDBs. No samples to extract. */
+		if (cpuhw->flags & PMU_F_RESERVED)
+			hw_perf_event_update(event, 1);
 		event->hw.state |= PERF_HES_UPTODATE;
 	}
 	perf_pmu_enable(event->pmu);
@@ -1887,15 +1765,14 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
 	struct aux_buffer *aux;
-	int err;
+	int err = 0;
 
 	if (cpuhw->flags & PMU_F_IN_USE)
 		return -EAGAIN;
 
-	if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+	if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw))
 		return -EINVAL;
 
-	err = 0;
 	perf_pmu_disable(event->pmu);
 
 	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -1909,9 +1786,9 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	cpuhw->lsctl.h = 1;
 	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
 	if (!SAMPL_DIAG_MODE(&event->hw)) {
-		cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
-		cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
-		TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
+		cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
+		cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt;
+		TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt;
 	}
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
@@ -2055,18 +1932,17 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 
 	/* Program alert request */
 	if (alert & CPU_MF_INT_SF_PRA) {
-		if (cpuhw->flags & PMU_F_IN_USE)
+		if (cpuhw->flags & PMU_F_IN_USE) {
 			if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
 				hw_collect_aux(cpuhw);
 			else
 				hw_perf_event_update(cpuhw->event, 0);
-		else
-			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+		}
 	}
 
 	/* Report measurement alerts only for non-PRA codes */
 	if (alert != CPU_MF_INT_SF_PRA)
-		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+		debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
 				    alert);
 
 	/* Sampling authorization change request */
@@ -2082,7 +1958,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 
 	/* Invalid sampling buffer entry */
 	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
-		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		pr_err("A sampling buffer entry is incorrect (alert=%#x)\n",
 		       alert);
 		cpuhw->flags |= PMU_F_ERR_IBE;
 		sf_disable();
@@ -2094,7 +1970,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
 	/* Ignore the notification if no events are scheduled on the PMU.
 	 * This might be racy...
 	 */
-	if (!atomic_read(&num_events))
+	if (!refcount_read(&num_events))
 		return 0;
 
 	local_irq_disable();
@@ -2156,10 +2032,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
 	.get = param_get_sfb_size,
 };
 
-#define RS_INIT_FAILURE_QSI	  0x0001
-#define RS_INIT_FAILURE_BSDES	  0x0002
-#define RS_INIT_FAILURE_ALRT	  0x0003
-#define RS_INIT_FAILURE_PERF	  0x0004
+enum {
+	RS_INIT_FAILURE_BSDES	= 2,	/* Bad basic sampling size */
+	RS_INIT_FAILURE_ALRT	= 3,	/* IRQ registration failure */
+	RS_INIT_FAILURE_PERF	= 4	/* PMU registration failure */
+};
+
 static void __init pr_cpumsf_err(unsigned int reason)
 {
 	pr_err("Sampling facility support for perf is not available: "
@@ -2175,11 +2053,7 @@ static int __init init_cpum_sampling_pmu(void)
 		return -ENODEV;
 
 	memset(&si, 0, sizeof(si));
-	if (qsi(&si)) {
-		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
-		return -ENODEV;
-	}
-
+	qsi(&si);
 	if (!si.as && !si.ad)
 		return -ENODEV;
 
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0969..2b9611c4718e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
 #include <linux/export.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
 #include <linux/sysfs.h>
+#include <asm/stacktrace.h>
 #include <asm/irq.h>
 #include <asm/cpu_mf.h>
 #include <asm/lowcore.h>
@@ -54,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
 	return sie_block(regs)->gpsw.addr;
 }
 
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
 {
 	return is_in_guest(regs) ? instruction_pointer_guest(regs)
 				 : instruction_pointer(regs);
@@ -81,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
 	return flags;
 }
 
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
 {
 	/* Check if the cpum_sf PMU has created the pt_regs structure.
 	 * In this case, perf misc flags can be easily extracted.  Otherwise,
@@ -212,6 +215,12 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 	}
 }
 
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
+{
+	arch_stack_walk_user_common(NULL, NULL, entry, regs, true);
+}
+
 /* Perf definitions for PMU event attributes in sysfs */
 ssize_t cpumf_events_sysfs_show(struct device *dev,
 				struct device_attribute *attr, char *page)
@@ -219,5 +228,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	struct perf_pmu_events_attr *pmu_attr;
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+	return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id);
 }
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index b38b4ae01589..63875270941b 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -16,8 +16,7 @@
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/perf_event.h>
-
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
 #include <asm/pai.h>
 #include <asm/debug.h>
 
@@ -35,13 +34,49 @@ struct pai_userdata {
 struct paicrypt_map {
 	unsigned long *page;		/* Page for CPU to store counters */
 	struct pai_userdata *save;	/* Page to store no-zero counters */
-	unsigned int users;		/* # of PAI crypto users */
-	unsigned int sampler;		/* # of PAI crypto samplers */
-	unsigned int counter;		/* # of PAI crypto counters */
+	unsigned int active_events;	/* # of PAI crypto users */
+	refcount_t refcnt;		/* Reference count mapped buffers */
 	struct perf_event *event;	/* Perf event for sampling */
+	struct list_head syswide_list;	/* List system-wide sampling events */
+};
+
+struct paicrypt_mapptr {
+	struct paicrypt_map *mapptr;
 };
 
-static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+static struct paicrypt_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct paicrypt_mapptr __percpu *mapptr;
+} paicrypt_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paicrypt_root_free(void)
+{
+	if (refcount_dec_and_test(&paicrypt_root.refcnt)) {
+		free_percpu(paicrypt_root.mapptr);
+		paicrypt_root.mapptr = NULL;
+	}
+	debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__,
+			    refcount_read(&paicrypt_root.refcnt));
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paicrypt_root_alloc(void)
+{
+	if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) {
+		/* The memory is already zeroed. */
+		paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr);
+		if (!paicrypt_root.mapptr)
+			return -ENOMEM;
+		refcount_set(&paicrypt_root.refcnt, 1);
+	}
+	return 0;
+}
 
 /* Release the PMU if event is the last perf event */
 static DEFINE_MUTEX(pai_reserve_mutex);
@@ -49,38 +84,51 @@ static DEFINE_MUTEX(pai_reserve_mutex);
 /* Adjust usage counters and remove allocated memory when all users are
  * gone.
  */
-static void paicrypt_event_destroy(struct perf_event *event)
+static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu)
 {
-	struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+	struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
+	struct paicrypt_map *cpump = mp->mapptr;
 
-	cpump->event = NULL;
-	static_branch_dec(&pai_key);
 	mutex_lock(&pai_reserve_mutex);
-	if (event->attr.sample_period)
-		cpump->sampler -= 1;
-	else
-		cpump->counter -= 1;
-	debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d"
-			    " sampler %d counter %d\n", __func__,
-			    event->attr.config, event->cpu, cpump->sampler,
-			    cpump->counter);
-	if (!cpump->counter && !cpump->sampler) {
+	debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d "
+			    "refcnt %u\n", __func__, event->attr.config,
+			    event->cpu, cpump->active_events,
+			    refcount_read(&cpump->refcnt));
+	if (refcount_dec_and_test(&cpump->refcnt)) {
 		debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
 				    __func__, (unsigned long)cpump->page,
 				    cpump->save);
 		free_page((unsigned long)cpump->page);
-		cpump->page = NULL;
 		kvfree(cpump->save);
-		cpump->save = NULL;
+		kfree(cpump);
+		mp->mapptr = NULL;
 	}
+	paicrypt_root_free();
 	mutex_unlock(&pai_reserve_mutex);
 }
 
-static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+	int cpu;
+
+	static_branch_dec(&pai_key);
+	free_page(PAI_SAVE_AREA(event));
+	if (event->cpu == -1) {
+		struct cpumask *mask = PAI_CPU_MASK(event);
+
+		for_each_cpu(cpu, mask)
+			paicrypt_event_destroy_cpu(event, cpu);
+		kfree(mask);
+	} else {
+		paicrypt_event_destroy_cpu(event, event->cpu);
+	}
+}
+
+static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
 {
 	if (kernel)
 		nr += PAI_CRYPTO_MAXCTR;
-	return cpump->page[nr];
+	return page[nr];
 }
 
 /* Read the counter values. Return value from location in CMP. For event
@@ -88,18 +136,19 @@ static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
  */
 static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
 {
-	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
 	u64 sum = 0;
 	int i;
 
 	if (event->attr.config != PAI_CRYPTO_BASE) {
-		return paicrypt_getctr(cpump,
+		return paicrypt_getctr(cpump->page,
 				       event->attr.config - PAI_CRYPTO_BASE,
 				       kernel);
 	}
 
 	for (i = 1; i <= paicrypt_cnt; i++) {
-		u64 val = paicrypt_getctr(cpump, i, kernel);
+		u64 val = paicrypt_getctr(cpump->page, i, kernel);
 
 		if (!val)
 			continue;
@@ -120,66 +169,110 @@ static u64 paicrypt_getall(struct perf_event *event)
 	return sum;
 }
 
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for crypto events
- *
- * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
+/* Check concurrent access of counting and sampling for crypto events.
  * This function is called in process context and it is save to block.
  * When the event initialization functions fails, no other call back will
  * be invoked.
  *
  * Allocate the memory for the event.
  */
-static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
 {
-	unsigned int *use_ptr;
-	int rc = 0;
+	struct paicrypt_map *cpump = NULL;
+	struct paicrypt_mapptr *mp;
+	int rc;
 
 	mutex_lock(&pai_reserve_mutex);
-	if (a->sample_period) {		/* Sampling requested */
-		use_ptr = &cpump->sampler;
-		if (cpump->counter || cpump->sampler)
-			rc = -EBUSY;	/* ... sampling/counting active */
-	} else {			/* Counting requested */
-		use_ptr = &cpump->counter;
-		if (cpump->sampler)
-			rc = -EBUSY;	/* ... and sampling active */
-	}
+
+	/* Allocate root node */
+	rc = paicrypt_root_alloc();
 	if (rc)
 		goto unlock;
 
+	/* Allocate node for this event */
+	mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
+	cpump = mp->mapptr;
+	if (!cpump) {			/* Paicrypt_map allocated? */
+		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+		if (!cpump) {
+			rc = -ENOMEM;
+			goto free_root;
+		}
+		INIT_LIST_HEAD(&cpump->syswide_list);
+	}
+
 	/* Allocate memory for counter page and counter extraction.
 	 * Only the first counting event has to allocate a page.
 	 */
-	if (cpump->page)
+	if (cpump->page) {
+		refcount_inc(&cpump->refcnt);
 		goto unlock;
+	}
 
 	rc = -ENOMEM;
 	cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 	if (!cpump->page)
-		goto unlock;
+		goto free_paicrypt_map;
 	cpump->save = kvmalloc_array(paicrypt_cnt + 1,
 				     sizeof(struct pai_userdata), GFP_KERNEL);
 	if (!cpump->save) {
 		free_page((unsigned long)cpump->page);
 		cpump->page = NULL;
-		goto unlock;
+		goto free_paicrypt_map;
 	}
-	rc = 0;
 
-unlock:
-	/* If rc is non-zero, do not increment counter/sampler. */
-	if (!rc)
-		*use_ptr += 1;
-	debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d"
-			    " counter %d page %#lx save %p rc %d\n", __func__,
-			    a->sample_period, cpump->sampler, cpump->counter,
+	/* Set mode and reference count */
+	rc = 0;
+	refcount_set(&cpump->refcnt, 1);
+	mp->mapptr = cpump;
+	debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx "
+			    "save %p rc %d\n", __func__, cpump->active_events,
+			    refcount_read(&cpump->refcnt),
 			    (unsigned long)cpump->page, cpump->save, rc);
+	goto unlock;
+
+free_paicrypt_map:
+	/* Undo memory allocation */
+	kfree(cpump);
+	mp->mapptr = NULL;
+free_root:
+	paicrypt_root_free();
+unlock:
 	mutex_unlock(&pai_reserve_mutex);
+	return rc ? ERR_PTR(rc) : cpump;
+}
+
+static int paicrypt_event_init_all(struct perf_event *event)
+{
+	struct paicrypt_map *cpump;
+	struct cpumask *maskptr;
+	int cpu, rc = -ENOMEM;
+
+	maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+	if (!maskptr)
+		goto out;
+
+	for_each_online_cpu(cpu) {
+		cpump = paicrypt_busy(event, cpu);
+		if (IS_ERR(cpump)) {
+			for_each_cpu(cpu, maskptr)
+				paicrypt_event_destroy_cpu(event, cpu);
+			kfree(maskptr);
+			rc = PTR_ERR(cpump);
+			goto out;
+		}
+		cpumask_set_cpu(cpu, maskptr);
+	}
+
+	/*
+	 * On error all cpumask are freed and all events have been destroyed.
+	 * Save of which CPUs data structures have been allocated for.
+	 * Release them in paicrypt_event_destroy call back function
+	 * for this event.
+	 */
+	PAI_CPU_MASK(event) = maskptr;
+	rc = 0;
+out:
 	return rc;
 }
 
@@ -188,7 +281,7 @@ static int paicrypt_event_init(struct perf_event *event)
 {
 	struct perf_event_attr *a = &event->attr;
 	struct paicrypt_map *cpump;
-	int rc;
+	int rc = 0;
 
 	/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
 	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
@@ -197,25 +290,29 @@ static int paicrypt_event_init(struct perf_event *event)
 	if (a->config < PAI_CRYPTO_BASE ||
 	    a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
 		return -EINVAL;
-	/* Allow only CPU wide operation, no process context for now. */
-	if (event->hw.target || event->cpu == -1)
-		return -ENOENT;
-	/* Allow only CRYPTO_ALL for sampling. */
+	/* Allow only CRYPTO_ALL for sampling */
 	if (a->sample_period && a->config != PAI_CRYPTO_BASE)
 		return -EINVAL;
+	/* Get a page to store last counter values for sampling */
+	if (a->sample_period) {
+		PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+		if (!PAI_SAVE_AREA(event)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+	}
 
-	cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
-	rc = paicrypt_busy(a, cpump);
-	if (rc)
-		return rc;
-
-	/* Event initialization sets last_tag to 0. When later on the events
-	 * are deleted and re-added, do not reset the event count value to zero.
-	 * Events are added, deleted and re-added when 2 or more events
-	 * are active at the same time.
-	 */
-	event->hw.last_tag = 0;
-	cpump->event = event;
+	if (event->cpu >= 0) {
+		cpump = paicrypt_busy(event, event->cpu);
+		if (IS_ERR(cpump))
+			rc = PTR_ERR(cpump);
+	} else {
+		rc = paicrypt_event_init_all(event);
+	}
+	if (rc) {
+		free_page(PAI_SAVE_AREA(event));
+		goto out;
+	}
 	event->destroy = paicrypt_event_destroy;
 
 	if (a->sample_period) {
@@ -230,7 +327,8 @@ static int paicrypt_event_init(struct perf_event *event)
 	}
 
 	static_branch_inc(&pai_key);
-	return 0;
+out:
+	return rc;
 }
 
 static void paicrypt_read(struct perf_event *event)
@@ -247,76 +345,102 @@ static void paicrypt_read(struct perf_event *event)
 
 static void paicrypt_start(struct perf_event *event, int flags)
 {
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
 	u64 sum;
 
-	if (!event->hw.last_tag) {
-		event->hw.last_tag = 1;
-		sum = paicrypt_getall(event);		/* Get current value */
-		local64_set(&event->count, 0);
+	if (!event->attr.sample_period) {	/* Counting */
+		sum = paicrypt_getall(event);	/* Get current value */
 		local64_set(&event->hw.prev_count, sum);
+	} else {				/* Sampling */
+		memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
+		/* Enable context switch callback for system-wide sampling */
+		if (!(event->attach_state & PERF_ATTACH_TASK)) {
+			list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+			perf_sched_cb_inc(event->pmu);
+		} else {
+			cpump->event = event;
+		}
 	}
 }
 
 static int paicrypt_add(struct perf_event *event, int flags)
 {
-	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
 	unsigned long ccd;
 
-	if (cpump->users++ == 0) {
+	if (++cpump->active_events == 1) {
 		ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
-		WRITE_ONCE(S390_lowcore.ccd, ccd);
-		__ctl_set_bit(0, 50);
+		WRITE_ONCE(get_lowcore()->ccd, ccd);
+		local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
 	}
-	cpump->event = event;
-	if (flags & PERF_EF_START && !event->attr.sample_period) {
-		/* Only counting needs initial counter value */
+	if (flags & PERF_EF_START)
 		paicrypt_start(event, PERF_EF_RELOAD);
-	}
 	event->hw.state = 0;
-	if (event->attr.sample_period)
-		perf_sched_cb_inc(event->pmu);
 	return 0;
 }
 
+static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *);
 static void paicrypt_stop(struct perf_event *event, int flags)
 {
-	paicrypt_read(event);
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
+
+	if (!event->attr.sample_period) {	/* Counting */
+		paicrypt_read(event);
+	} else {				/* Sampling */
+		if (!(event->attach_state & PERF_ATTACH_TASK)) {
+			perf_sched_cb_dec(event->pmu);
+			list_del(PAI_SWLIST(event));
+		} else {
+			paicrypt_have_sample(event, cpump);
+			cpump->event = NULL;
+		}
+	}
 	event->hw.state = PERF_HES_STOPPED;
 }
 
 static void paicrypt_del(struct perf_event *event, int flags)
 {
-	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
-
-	if (event->attr.sample_period)
-		perf_sched_cb_dec(event->pmu);
-	if (!event->attr.sample_period)
-		/* Only counting needs to read counter */
-		paicrypt_stop(event, PERF_EF_UPDATE);
-	if (cpump->users-- == 1) {
-		__ctl_clear_bit(0, 50);
-		WRITE_ONCE(S390_lowcore.ccd, 0);
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
+
+	paicrypt_stop(event, PERF_EF_UPDATE);
+	if (--cpump->active_events == 0) {
+		local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+		WRITE_ONCE(get_lowcore()->ccd, 0);
 	}
 }
 
-/* Create raw data and save it in buffer. Returns number of bytes copied.
- * Saves only positive counter entries of the form
+/* Create raw data and save it in buffer. Calculate the delta for each
+ * counter between this invocation and the last invocation.
+ * Returns number of bytes copied.
+ * Saves only entries with positive counter difference of the form
  * 2 bytes: Number of counter
  * 8 bytes: Value of counter
  */
-static size_t paicrypt_copy(struct pai_userdata *userdata,
-			    struct paicrypt_map *cpump,
-			    bool exclude_user, bool exclude_kernel)
+static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
+			    unsigned long *page_old, bool exclude_user,
+			    bool exclude_kernel)
 {
 	int i, outidx = 0;
 
 	for (i = 1; i <= paicrypt_cnt; i++) {
-		u64 val = 0;
+		u64 val = 0, val_old = 0;
 
-		if (!exclude_kernel)
-			val += paicrypt_getctr(cpump, i, true);
-		if (!exclude_user)
-			val += paicrypt_getctr(cpump, i, false);
+		if (!exclude_kernel) {
+			val += paicrypt_getctr(page, i, true);
+			val_old += paicrypt_getctr(page_old, i, true);
+		}
+		if (!exclude_user) {
+			val += paicrypt_getctr(page, i, false);
+			val_old += paicrypt_getctr(page_old, i, false);
+		}
+		if (val >= val_old)
+			val -= val_old;
+		else
+			val = (~0ULL - val_old) + val + 1;
 		if (val) {
 			userdata[outidx].num = i;
 			userdata[outidx].value = val;
@@ -326,24 +450,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
 	return outidx * sizeof(struct pai_userdata);
 }
 
-static int paicrypt_push_sample(void)
+static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
+				struct perf_event *event)
 {
-	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
-	struct perf_event *event = cpump->event;
 	struct perf_sample_data data;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
-	size_t rawsize;
 	int overflow;
 
-	if (!cpump->event)		/* No event active */
-		return 0;
-	rawsize = paicrypt_copy(cpump->save, cpump,
-				cpump->event->attr.exclude_user,
-				cpump->event->attr.exclude_kernel);
-	if (!rawsize)			/* No incremented counters */
-		return 0;
-
 	/* Setup perf sample */
 	memset(&regs, 0, sizeof(regs));
 	memset(&raw, 0, sizeof(raw));
@@ -364,27 +478,54 @@ static int paicrypt_push_sample(void)
 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 		raw.frag.size = rawsize;
 		raw.frag.data = cpump->save;
-		raw.size = raw.frag.size;
-		data.raw = &raw;
+		perf_sample_save_raw_data(&data, event, &raw);
 	}
 
 	overflow = perf_event_overflow(event, &data, &regs);
 	perf_event_update_userpage(event);
-	/* Clear lowcore page after read */
-	memset(cpump->page, 0, PAGE_SIZE);
+	/* Save crypto counter lowcore page after reading event data. */
+	memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
 	return overflow;
 }
 
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_sample(struct perf_event *event,
+				 struct paicrypt_map *cpump)
+{
+	size_t rawsize;
+
+	if (!event)		/* No event active */
+		return;
+	rawsize = paicrypt_copy(cpump->save, cpump->page,
+				(unsigned long *)PAI_SAVE_AREA(event),
+				event->attr.exclude_user,
+				event->attr.exclude_kernel);
+	if (rawsize)			/* No incremented counters */
+		paicrypt_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_samples(void)
+{
+	struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+	struct paicrypt_map *cpump = mp->mapptr;
+	struct perf_event *event;
+
+	list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+		paicrypt_have_sample(event, cpump);
+}
+
 /* Called on schedule-in and schedule-out. No access to event structure,
  * but for sampling only event CRYPTO_ALL is allowed.
  */
-static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
+				struct task_struct *task, bool sched_in)
 {
 	/* We started with a clean page on event installation. So read out
-	 * results on schedule_out and if page was dirty, clear values.
+	 * results on schedule_out and if page was dirty, save old values.
 	 */
 	if (!sched_in)
-		paicrypt_push_sample();
+		paicrypt_have_samples();
 }
 
 /* Attribute definitions for paicrypt interface. As with other CPU
@@ -428,7 +569,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = {
 
 /* Performance monitoring unit for mapped counters */
 static struct pmu paicrypt = {
-	.task_ctx_nr  = perf_invalid_context,
+	.task_ctx_nr  = perf_hw_context,
 	.event_init   = paicrypt_event_init,
 	.add	      = paicrypt_add,
 	.del	      = paicrypt_del,
@@ -598,6 +739,22 @@ static const char * const paicrypt_ctrnames[] = {
 	[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
 	[155] = "IBM_RESERVED_155",
 	[156] = "IBM_RESERVED_156",
+	[157] = "KM_FULL_XTS_AES_128",
+	[158] = "KM_FULL_XTS_AES_256",
+	[159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+	[160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+	[161] = "KMAC_HMAC_SHA_224",
+	[162] = "KMAC_HMAC_SHA_256",
+	[163] = "KMAC_HMAC_SHA_384",
+	[164] = "KMAC_HMAC_SHA_512",
+	[165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+	[166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+	[167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+	[168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+	[169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+	[170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+	[171] = "PCKMO_ENCRYPT_AES_XTS_128",
+	[172] = "PCKMO_ENCRYPT_AES_XTS_256",
 };
 
 static void __init attr_event_free(struct attribute **attrs, int num)
@@ -619,6 +776,12 @@ static int __init attr_event_init_one(struct attribute **attrs, int num)
 {
 	struct perf_pmu_events_attr *pa;
 
+	/* Index larger than array_size, no counter name available */
+	if (num >= ARRAY_SIZE(paicrypt_ctrnames)) {
+		attrs[num] = NULL;
+		return 0;
+	}
+
 	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
 	if (!pa)
 		return -ENOMEM;
@@ -639,14 +802,13 @@ static int __init attr_event_init(void)
 	struct attribute **attrs;
 	int ret, i;
 
-	attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
-			      GFP_KERNEL);
+	attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL);
 	if (!attrs)
 		return -ENOMEM;
-	for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+	for (i = 0; i <= paicrypt_cnt; i++) {
 		ret = attr_event_init_one(attrs, i);
 		if (ret) {
-			attr_event_free(attrs, i - 1);
+			attr_event_free(attrs, i);
 			return ret;
 		}
 	}
@@ -667,8 +829,10 @@ static int __init paicrypt_init(void)
 	paicrypt_cnt = ib.num_cc;
 	if (paicrypt_cnt == 0)
 		return 0;
-	if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
-		paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+	if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) {
+		pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt);
+		return -E2BIG;
+	}
 
 	rc = attr_event_init();		/* Export known PAI crypto events */
 	if (rc) {
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..fd14d5ebccbc
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_ext"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+#include <asm/ctlreg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define	PAIE1_CB_SZ		0x200	/* Size of PAIE1 control block */
+#define	PAIE1_CTRBLOCK_SZ	0x400	/* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt;	/* Extracted with QPACI instruction */
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb {		/* PAI extension 1 control block */
+	u64 header;		/* Not used */
+	u64 reserved1;
+	u64 acc;		/* Addr to analytics counter control block */
+	u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+	unsigned long *area;		/* Area for CPU to store counters */
+	struct pai_userdata *save;	/* Area to store non-zero counters */
+	unsigned int active_events;	/* # of PAI Extension users */
+	refcount_t refcnt;
+	struct perf_event *event;	/* Perf event for sampling */
+	struct paiext_cb *paiext_cb;	/* PAI extension control block area */
+	struct list_head syswide_list;	/* List system-wide sampling events */
+};
+
+struct paiext_mapptr {
+	struct paiext_map *mapptr;
+};
+
+static struct paiext_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+	if (refcount_dec_and_test(&paiext_root.refcnt)) {
+		free_percpu(paiext_root.mapptr);
+		paiext_root.mapptr = NULL;
+	}
+	debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__,
+			    refcount_read(&paiext_root.refcnt));
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+	if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
+		/* The memory is already zeroed. */
+		paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+		if (!paiext_root.mapptr) {
+			/* Returning without refcnt adjustment is ok. The
+			 * error code is handled by paiext_alloc() which
+			 * decrements refcnt when an event can not be
+			 * created.
+			 */
+			return -ENOMEM;
+		}
+		refcount_set(&paiext_root.refcnt, 1);
+	}
+	return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+	kfree(mp->mapptr->area);
+	kfree(mp->mapptr->paiext_cb);
+	kvfree(mp->mapptr->save);
+	kfree(mp->mapptr);
+	mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy_cpu(struct perf_event *event, int cpu)
+{
+	struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu);
+	struct paiext_map *cpump = mp->mapptr;
+
+	mutex_lock(&paiext_reserve_mutex);
+	if (refcount_dec_and_test(&cpump->refcnt))	/* Last reference gone */
+		paiext_free(mp);
+	paiext_root_free();
+	mutex_unlock(&paiext_reserve_mutex);
+}
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+	int cpu;
+
+	free_page(PAI_SAVE_AREA(event));
+	if (event->cpu == -1) {
+		struct cpumask *mask = PAI_CPU_MASK(event);
+
+		for_each_cpu(cpu, mask)
+			paiext_event_destroy_cpu(event, cpu);
+		kfree(mask);
+	} else {
+		paiext_event_destroy_cpu(event, event->cpu);
+	}
+	debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__,
+			    event->cpu);
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc_cpu(struct perf_event *event, int cpu)
+{
+	struct paiext_mapptr *mp;
+	struct paiext_map *cpump;
+	int rc;
+
+	mutex_lock(&paiext_reserve_mutex);
+	rc = paiext_root_alloc();
+	if (rc)
+		goto unlock;
+
+	mp = per_cpu_ptr(paiext_root.mapptr, cpu);
+	cpump = mp->mapptr;
+	if (!cpump) {			/* Paiext_map allocated? */
+		rc = -ENOMEM;
+		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+		if (!cpump)
+			goto undo;
+
+		/* Allocate memory for counter area and counter extraction.
+		 * These are
+		 * - a 512 byte block and requires 512 byte boundary alignment.
+		 * - a 1KB byte block and requires 1KB boundary alignment.
+		 * Only the first counting event has to allocate the area.
+		 *
+		 * Note: This works with commit 59bb47985c1d by default.
+		 * Backporting this to kernels without this commit might
+		 * need adjustment.
+		 */
+		mp->mapptr = cpump;
+		cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+		cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+		cpump->save = kvmalloc_array(paiext_cnt + 1,
+					     sizeof(struct pai_userdata),
+					     GFP_KERNEL);
+		if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+			paiext_free(mp);
+			goto undo;
+		}
+		INIT_LIST_HEAD(&cpump->syswide_list);
+		refcount_set(&cpump->refcnt, 1);
+		rc = 0;
+	} else {
+		refcount_inc(&cpump->refcnt);
+	}
+
+undo:
+	if (rc) {
+		/* Error in allocation of event, decrement anchor. Since
+		 * the event in not created, its destroy() function is never
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		paiext_root_free();
+	}
+unlock:
+	mutex_unlock(&paiext_reserve_mutex);
+	/* If rc is non-zero, no increment of counter/sampler was done. */
+	return rc;
+}
+
+static int paiext_alloc(struct perf_event *event)
+{
+	struct cpumask *maskptr;
+	int cpu, rc = -ENOMEM;
+
+	maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+	if (!maskptr)
+		goto out;
+
+	for_each_online_cpu(cpu) {
+		rc = paiext_alloc_cpu(event, cpu);
+		if (rc) {
+			for_each_cpu(cpu, maskptr)
+				paiext_event_destroy_cpu(event, cpu);
+			kfree(maskptr);
+			goto out;
+		}
+		cpumask_set_cpu(cpu, maskptr);
+	}
+
+	/*
+	 * On error all cpumask are freed and all events have been destroyed.
+	 * Save of which CPUs data structures have been allocated for.
+	 * Release them in paicrypt_event_destroy call back function
+	 * for this event.
+	 */
+	PAI_CPU_MASK(event) = maskptr;
+	rc = 0;
+out:
+	return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+		/* Offset NNPA in paiext_cb */
+		event->hw.config_base = offsetof(struct paiext_cb, acc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	int rc;
+
+	/* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI extension event must be valid and in supported range */
+	rc = paiext_event_valid(event);
+	if (rc)
+		return rc;
+	/* Allow only event NNPA_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_NNPA_BASE)
+		return -EINVAL;
+	/* Prohibit exclude_user event selection */
+	if (a->exclude_user)
+		return -EINVAL;
+	/* Get a page to store last counter values for sampling */
+	if (a->sample_period) {
+		PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+		if (!PAI_SAVE_AREA(event))
+			return -ENOMEM;
+	}
+
+	if (event->cpu >= 0)
+		rc = paiext_alloc_cpu(event, event->cpu);
+	else
+		rc = paiext_alloc(event);
+	if (rc) {
+		free_page(PAI_SAVE_AREA(event));
+		return rc;
+	}
+	event->destroy = paiext_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which are the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	return 0;
+}
+
+static u64 paiext_getctr(unsigned long *area, int nr)
+{
+	return area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_NNPA_BASE)
+		return paiext_getctr(cpump->area,
+				     event->attr.config - PAI_NNPA_BASE);
+
+	for (i = 1; i <= paiext_cnt; i++)
+		sum += paiext_getctr(cpump->area, i);
+
+	return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+	return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paiext_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = new - prev;
+	local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	u64 sum;
+
+	if (!event->attr.sample_period) {	/* Counting */
+		sum = paiext_getall(event);	/* Get current value */
+		local64_set(&event->hw.prev_count, sum);
+	} else {				/* Sampling */
+		memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+		       PAIE1_CTRBLOCK_SZ);
+		/* Enable context switch callback for system-wide sampling */
+		if (!(event->attach_state & PERF_ATTACH_TASK)) {
+			list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+			perf_sched_cb_inc(event->pmu);
+		} else {
+			cpump->event = event;
+		}
+	}
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (++cpump->active_events == 1) {
+		get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb);
+		pcb->acc = virt_to_phys(cpump->area) | 0x1;
+		/* Enable CPU instruction lookup for PAIE1 control block */
+		local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
+	}
+	if (flags & PERF_EF_START)
+		paiext_start(event, PERF_EF_RELOAD);
+	event->hw.state = 0;
+	return 0;
+}
+
+static void paiext_have_sample(struct perf_event *, struct paiext_map *);
+static void paiext_stop(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+
+	if (!event->attr.sample_period) {	/* Counting */
+		paiext_read(event);
+	} else {				/* Sampling */
+		if (!(event->attach_state & PERF_ATTACH_TASK)) {
+			list_del(PAI_SWLIST(event));
+			perf_sched_cb_dec(event->pmu);
+		} else {
+			paiext_have_sample(event, cpump);
+			cpump->event = NULL;
+		}
+	}
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	paiext_stop(event, PERF_EF_UPDATE);
+	if (--cpump->active_events == 0) {
+		/* Disable CPU instruction lookup for PAIE1 control block */
+		local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
+		pcb->acc = 0;
+		get_lowcore()->aicd = 0;
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area,
+			  unsigned long *area_old)
+{
+	int i, outidx = 0;
+
+	for (i = 1; i <= paiext_cnt; i++) {
+		u64 val = paiext_getctr(area, i);
+		u64 val_old = paiext_getctr(area_old, i);
+
+		if (val >= val_old)
+			val -= val_old;
+		else
+			val = (~0ULL - val_old) + val + 1;
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
+			      struct perf_event *event)
+{
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	int overflow;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = smp_processor_id();
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		perf_sample_save_raw_data(&data, event, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Save NNPA lowcore area after read in event */
+	memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+	       PAIE1_CTRBLOCK_SZ);
+	return overflow;
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_sample(struct perf_event *event,
+			       struct paiext_map *cpump)
+{
+	size_t rawsize;
+
+	if (!event)
+		return;
+	rawsize = paiext_copy(cpump->save, cpump->area,
+			      (unsigned long *)PAI_SAVE_AREA(event));
+	if (rawsize)			/* Incremented counters */
+		paiext_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_samples(void)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct perf_event *event;
+
+	list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+		paiext_have_sample(event, cpump);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
+			      struct task_struct *task, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, save old values.
+	 */
+	if (!sched_in)
+		paiext_have_samples();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+	.name = "events",
+	.attrs = NULL,			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+	.name = "format",
+	.attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+	&paiext_events_group,
+	&paiext_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+	.task_ctx_nr  = perf_hw_context,
+	.event_init   = paiext_event_init,
+	.add	      = paiext_add,
+	.del	      = paiext_del,
+	.start	      = paiext_start,
+	.stop	      = paiext_stop,
+	.read	      = paiext_read,
+	.sched_task   = paiext_sched_task,
+	.attr_groups  = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+	[0] = "NNPA_ALL",
+	[1] = "NNPA_ADD",
+	[2] = "NNPA_SUB",
+	[3] = "NNPA_MUL",
+	[4] = "NNPA_DIV",
+	[5] = "NNPA_MIN",
+	[6] = "NNPA_MAX",
+	[7] = "NNPA_LOG",
+	[8] = "NNPA_EXP",
+	[9] = "NNPA_IBM_RESERVED_9",
+	[10] = "NNPA_RELU",
+	[11] = "NNPA_TANH",
+	[12] = "NNPA_SIGMOID",
+	[13] = "NNPA_SOFTMAX",
+	[14] = "NNPA_BATCHNORM",
+	[15] = "NNPA_MAXPOOL2D",
+	[16] = "NNPA_AVGPOOL2D",
+	[17] = "NNPA_LSTMACT",
+	[18] = "NNPA_GRUACT",
+	[19] = "NNPA_CONVOLUTION",
+	[20] = "NNPA_MATMUL_OP",
+	[21] = "NNPA_MATMUL_OP_BCAST23",
+	[22] = "NNPA_SMALLBATCH",
+	[23] = "NNPA_LARGEDIM",
+	[24] = "NNPA_SMALLTENSOR",
+	[25] = "NNPA_1MFRAME",
+	[26] = "NNPA_2GFRAME",
+	[27] = "NNPA_ACCESSEXCEPT",
+	[28] = "NNPA_TRANSFORM",
+	[29] = "NNPA_GELU",
+	[30] = "NNPA_MOMENTS",
+	[31] = "NNPA_LAYERNORM",
+	[32] = "NNPA_MATMUL_OP_BCAST1",
+	[33] = "NNPA_SQRT",
+	[34] = "NNPA_INVSQRT",
+	[35] = "NNPA_NORM",
+	[36] = "NNPA_REDUCE",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	struct device_attribute *dap;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	/* Index larger than array_size, no counter name available */
+	if (num >= ARRAY_SIZE(paiext_ctrnames)) {
+		attrs[num] = NULL;
+		return 0;
+	}
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_NNPA_BASE + num;
+	pa->attr.attr.name = paiext_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i <= paiext_cnt; i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paiext_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paiext_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc = -ENOMEM;
+
+	if (!test_facility(197))
+		return 0;
+
+	qpaci(&ib);
+	paiext_cnt = ib.num_nnpa;
+	if (paiext_cnt >= PAI_NNPA_MAXCTR)
+		paiext_cnt = PAI_NNPA_MAXCTR;
+	if (!paiext_cnt)
+		return 0;
+
+	rc = attr_event_init();
+	if (rc) {
+		pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!paiext_dbg) {
+		pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+		rc = -ENOMEM;
+		goto out_init;
+	}
+	debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+	if (rc) {
+		pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+		       "rc=%i\n", rc);
+		goto out_pmu;
+	}
+
+	return 0;
+
+out_pmu:
+	debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+	debug_unregister(paiext_dbg);
+out_init:
+	attr_event_free(paiext_events_group.attrs,
+			ARRAY_SIZE(paiext_ctrnames) + 1);
+	return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 6e9e5d5e927e..a6b058ee4a36 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -5,8 +5,7 @@
 #include <linux/errno.h>
 #include <linux/bug.h>
 #include <asm/ptrace.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/types.h>
+#include <asm/fpu.h>
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
@@ -20,8 +19,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 			return 0;
 
 		idx -= PERF_REG_S390_FP0;
-		fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
-				    : current->thread.fpu.fprs[idx];
+		fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
 		return fp.ui;
 	}
 
@@ -63,6 +61,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 	 */
 	regs_user->regs = task_pt_regs(current);
 	if (user_mode(regs_user->regs))
-		save_fpu_regs();
+		save_user_fpu_regs();
 	regs_user->abi = perf_reg_abi(current);
 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 89949b9f3cf8..9637aee43c40 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -30,16 +30,20 @@
 #include <linux/export.h>
 #include <linux/init_task.h>
 #include <linux/entry-common.h>
+#include <linux/io.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
+#include <asm/switch_to.h>
 #include <asm/cpu_mf.h>
-#include <asm/io.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/vtimer.h>
 #include <asm/exec.h>
+#include <asm/fpu.h>
 #include <asm/irq.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include <asm/stacktrace.h>
-#include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
 #include <asm/unwind.h>
 #include "entry.h"
@@ -67,10 +71,10 @@ void flush_thread(void)
 
 void arch_setup_new_exec(void)
 {
-	if (S390_lowcore.current_pid != current->pid) {
-		S390_lowcore.current_pid = current->pid;
+	if (get_lowcore()->current_pid != current->pid) {
+		get_lowcore()->current_pid = current->pid;
 		if (test_facility(40))
-			lpp(&S390_lowcore.lpp);
+			lpp(&get_lowcore()->lpp);
 	}
 }
 
@@ -82,15 +86,22 @@ void arch_release_task_struct(struct task_struct *tsk)
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
+	save_user_fpu_regs();
+
+	*dst = *src;
+	dst->thread.kfpu_flags = 0;
+
 	/*
-	 * Save the floating-point or vector register state of the current
-	 * task and set the CIF_FPU flag to lazy restore the FPU register
-	 * state when returning to user space.
+	 * Don't transfer over the runtime instrumentation or the guarded
+	 * storage control block pointers. These fields are cleared here instead
+	 * of in copy_thread() to avoid premature freeing of associated memory
+	 * on fork() failure. Wait to clear the RI flag because ->stack still
+	 * refers to the source thread.
 	 */
-	save_fpu_regs();
+	dst->thread.ri_cb = NULL;
+	dst->thread.gs_cb = NULL;
+	dst->thread.gs_bc_cb = NULL;
 
-	memcpy(dst, src, arch_task_struct_size);
-	dst->thread.fpu.regs = dst->thread.fpu.fprs;
 	return 0;
 }
 
@@ -124,21 +135,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	p->thread.last_break = 1;
 
 	frame->sf.back_chain = 0;
-	frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame);
-	frame->sf.gprs[6] = (unsigned long)p;
+	frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs;
+	frame->sf.gprs[12 - 6] = (unsigned long)p;
 	/* new return point is ret_from_fork */
-	frame->sf.gprs[8] = (unsigned long)ret_from_fork;
+	frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork;
 	/* fake return stack for resume(), don't go back to schedule */
-	frame->sf.gprs[9] = (unsigned long)frame;
+	frame->sf.gprs[15 - 6] = (unsigned long)frame;
 
 	/* Store access registers to kernel stack of new process. */
 	if (unlikely(args->fn)) {
 		/* kernel thread */
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
-		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
-				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-		frame->childregs.psw.addr =
-				(unsigned long)__ret_from_fork;
+		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+					    PSW_MASK_EXT | PSW_MASK_MCHECK;
 		frame->childregs.gprs[9] = (unsigned long)args->fn;
 		frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
 		frame->childregs.orig_gpr2 = -1;
@@ -150,13 +159,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	frame->childregs.flags = 0;
 	if (new_stackp)
 		frame->childregs.gprs[15] = new_stackp;
-
-	/* Don't copy runtime instrumentation info */
-	p->thread.ri_cb = NULL;
+	/*
+	 * Clear the runtime instrumentation flag after the above childregs
+	 * copy. The CB pointer was already cleared in arch_dup_task_struct().
+	 */
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
-	/* Don't copy guarded storage control block */
-	p->thread.gs_cb = NULL;
-	p->thread.gs_bc_cb = NULL;
 
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
@@ -178,8 +185,23 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 
 void execve_tail(void)
 {
-	current->thread.fpu.fpc = 0;
-	asm volatile("sfpc %0" : : "d" (0));
+	current->thread.ufpu.fpc = 0;
+	fpu_sfpc(0);
+}
+
+struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next)
+{
+	save_user_fpu_regs();
+	save_kernel_fpu_regs(&prev->thread);
+	save_access_regs(&prev->thread.acrs[0]);
+	save_ri_cb(prev->thread.ri_cb);
+	save_gs_cb(prev->thread.gs_cb);
+	update_cr_regs(next);
+	restore_kernel_fpu_regs(&next->thread);
+	restore_access_regs(&next->thread.acrs[0]);
+	restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);
+	restore_gs_cb(next->thread.gs_cb);
+	return __switch_to_asm(prev, next);
 }
 
 unsigned long __get_wchan(struct task_struct *p)
@@ -214,13 +236,13 @@ unsigned long __get_wchan(struct task_struct *p)
 unsigned long arch_align_stack(unsigned long sp)
 {
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() & ~PAGE_MASK;
+		sp -= get_random_u32_below(PAGE_SIZE);
 	return sp & ~0xf;
 }
 
 static inline unsigned long brk_rnd(void)
 {
-	return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+	return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
 }
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index aa0e0e7fc773..11f70c1e2797 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,7 +18,9 @@
 #include <linux/mm_types.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/facility.h>
 #include <asm/elf.h>
@@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
 	this_cpu = smp_processor_id();
 	if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
 		__this_cpu_write(cpu_relax_retry, 0);
-		cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+		cpu = cpumask_next_wrap(this_cpu, cpumask);
 		if (cpu >= nr_cpu_ids)
 			return;
 		if (arch_vcpu_is_preempted(cpu))
@@ -80,6 +82,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
 	}
 }
 
+static void do_sync_core(void *info)
+{
+	sync_core();
+}
+
+void text_poke_sync(void)
+{
+	on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+	cpus_read_lock();
+	text_poke_sync();
+	cpus_read_unlock();
+}
+
 /*
  * cpu_init - initializes state that is per-CPU.
  */
@@ -96,15 +115,6 @@ void cpu_init(void)
 	enter_lazy_tlb(&init_mm, current);
 }
 
-/*
- * cpu_have_feature - Test CPU features on module initialization
- */
-int cpu_have_feature(unsigned int num)
-{
-	return elf_hwcap & (1UL << num);
-}
-EXPORT_SYMBOL(cpu_have_feature);
-
 static void show_facilities(struct seq_file *m)
 {
 	unsigned int bit;
@@ -201,21 +211,18 @@ static int __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_DFP;
 
 	/* huge page support */
-	if (MACHINE_HAS_EDAT1)
+	if (cpu_has_edat1())
 		elf_hwcap |= HWCAP_HPAGE;
 
 	/* 64-bit register support for 31-bit processes */
 	elf_hwcap |= HWCAP_HIGH_GPRS;
 
 	/* transactional execution */
-	if (MACHINE_HAS_TE)
+	if (machine_has_tx())
 		elf_hwcap |= HWCAP_TE;
 
-	/*
-	 * Vector extension can be disabled with the "novx" parameter.
-	 * Use MACHINE_HAS_VX instead of facility bit 129.
-	 */
-	if (MACHINE_HAS_VX) {
+	/* vector */
+	if (test_facility(129)) {
 		elf_hwcap |= HWCAP_VXRS;
 		if (test_facility(134))
 			elf_hwcap |= HWCAP_VXRS_BCD;
@@ -239,10 +246,10 @@ static int __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_NNPA;
 
 	/* guarded storage */
-	if (MACHINE_HAS_GS)
+	if (cpu_has_gs())
 		elf_hwcap |= HWCAP_GS;
 
-	if (MACHINE_HAS_PCI_MIO)
+	if (test_machine_feature(MFEATURE_PCI_MIO))
 		elf_hwcap |= HWCAP_PCI_MIO;
 
 	/* virtualization support */
@@ -261,31 +268,35 @@ static int __init setup_elf_platform(void)
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
 	switch (cpu_id.machine) {
 	default:	/* Use "z10" as default. */
-		strcpy(elf_platform, "z10");
+		strscpy(elf_platform, "z10");
 		break;
 	case 0x2817:
 	case 0x2818:
-		strcpy(elf_platform, "z196");
+		strscpy(elf_platform, "z196");
 		break;
 	case 0x2827:
 	case 0x2828:
-		strcpy(elf_platform, "zEC12");
+		strscpy(elf_platform, "zEC12");
 		break;
 	case 0x2964:
 	case 0x2965:
-		strcpy(elf_platform, "z13");
+		strscpy(elf_platform, "z13");
 		break;
 	case 0x3906:
 	case 0x3907:
-		strcpy(elf_platform, "z14");
+		strscpy(elf_platform, "z14");
 		break;
 	case 0x8561:
 	case 0x8562:
-		strcpy(elf_platform, "z15");
+		strscpy(elf_platform, "z15");
 		break;
 	case 0x3931:
 	case 0x3932:
-		strcpy(elf_platform, "z16");
+		strscpy(elf_platform, "z16");
+		break;
+	case 0x9175:
+	case 0x9176:
+		strscpy(elf_platform, "z17");
 		break;
 	}
 	return 0;
@@ -374,21 +385,3 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
-
-int s390_isolate_bp(void)
-{
-	if (!test_facility(82))
-		return -EOPNOTSUPP;
-	set_thread_flag(TIF_ISOLATE_BP);
-	return 0;
-}
-EXPORT_SYMBOL(s390_isolate_bp);
-
-int s390_isolate_bp_guest(void)
-{
-	if (!test_facility(82))
-		return -EOPNOTSUPP;
-	set_thread_flag(TIF_ISOLATE_BP_GUEST);
-	return 0;
-}
-EXPORT_SYMBOL(s390_isolate_bp_guest);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 53e0209229f8..e1240f6b29fa 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -7,10 +7,10 @@
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  */
 
-#include "asm/ptrace.h"
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/cpufeature.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
@@ -24,12 +24,17 @@
 #include <linux/seccomp.h>
 #include <linux/compat.h>
 #include <trace/syscall.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
 #include <asm/page.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
-#include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
 #include <asm/facility.h>
+#include <asm/machine.h>
+#include <asm/ptrace.h>
+#include <asm/rwonce.h>
+#include <asm/fpu.h>
 
 #include "entry.h"
 
@@ -41,17 +46,24 @@ void update_cr_regs(struct task_struct *task)
 {
 	struct pt_regs *regs = task_pt_regs(task);
 	struct thread_struct *thread = &task->thread;
-	struct per_regs old, new;
 	union ctlreg0 cr0_old, cr0_new;
 	union ctlreg2 cr2_old, cr2_new;
 	int cr0_changed, cr2_changed;
-
-	__ctl_store(cr0_old.val, 0, 0);
-	__ctl_store(cr2_old.val, 2, 2);
+	union {
+		struct ctlreg regs[3];
+		struct {
+			struct ctlreg control;
+			struct ctlreg start;
+			struct ctlreg end;
+		};
+	} old, new;
+
+	local_ctl_store(0, &cr0_old.reg);
+	local_ctl_store(2, &cr2_old.reg);
 	cr0_new = cr0_old;
 	cr2_new = cr2_old;
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE) {
+	if (machine_has_tx()) {
 		/* Set or clear transaction execution TXC bit 8. */
 		cr0_new.tcx = 1;
 		if (task->thread.per_flags & PER_FLAG_NO_TE)
@@ -66,7 +78,7 @@ void update_cr_regs(struct task_struct *task)
 		}
 	}
 	/* Take care of enable/disable of guarded storage. */
-	if (MACHINE_HAS_GS) {
+	if (cpu_has_gs()) {
 		cr2_new.gse = 0;
 		if (task->thread.gs_cb)
 			cr2_new.gse = 1;
@@ -75,38 +87,38 @@ void update_cr_regs(struct task_struct *task)
 	cr0_changed = cr0_new.val != cr0_old.val;
 	cr2_changed = cr2_new.val != cr2_old.val;
 	if (cr0_changed)
-		__ctl_load(cr0_new.val, 0, 0);
+		local_ctl_load(0, &cr0_new.reg);
 	if (cr2_changed)
-		__ctl_load(cr2_new.val, 2, 2);
+		local_ctl_load(2, &cr2_new.reg);
 	/* Copy user specified PER registers */
-	new.control = thread->per_user.control;
-	new.start = thread->per_user.start;
-	new.end = thread->per_user.end;
+	new.control.val = thread->per_user.control;
+	new.start.val = thread->per_user.start;
+	new.end.val = thread->per_user.end;
 
 	/* merge TIF_SINGLE_STEP into user specified PER registers. */
 	if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
 	    test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
 		if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
-			new.control |= PER_EVENT_BRANCH;
+			new.control.val |= PER_EVENT_BRANCH;
 		else
-			new.control |= PER_EVENT_IFETCH;
-		new.control |= PER_CONTROL_SUSPENSION;
-		new.control |= PER_EVENT_TRANSACTION_END;
+			new.control.val |= PER_EVENT_IFETCH;
+		new.control.val |= PER_CONTROL_SUSPENSION;
+		new.control.val |= PER_EVENT_TRANSACTION_END;
 		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
-			new.control |= PER_EVENT_IFETCH;
-		new.start = 0;
-		new.end = -1UL;
+			new.control.val |= PER_EVENT_IFETCH;
+		new.start.val = 0;
+		new.end.val = -1UL;
 	}
 
 	/* Take care of the PER enablement bit in the PSW. */
-	if (!(new.control & PER_EVENT_MASK)) {
+	if (!(new.control.val & PER_EVENT_MASK)) {
 		regs->psw.mask &= ~PSW_MASK_PER;
 		return;
 	}
 	regs->psw.mask |= PSW_MASK_PER;
-	__ctl_store(old, 9, 11);
+	__local_ctl_store(9, 11, old.regs);
 	if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
-		__ctl_load(new, 9, 11);
+		__local_ctl_load(9, 11, new.regs);
 }
 
 void user_enable_single_step(struct task_struct *task)
@@ -238,22 +250,15 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fpu.fpc;
+		tmp = child->thread.ufpu.fpc;
 		tmp <<= BITS_PER_LONG - 32;
 
 	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
 		/*
-		 * floating point regs. are either in child->thread.fpu
-		 * or the child->thread.fpu.vxrs array
+		 * floating point regs. are in the child->thread.ufpu.vxrs array
 		 */
 		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
-		if (MACHINE_HAS_VX)
-			tmp = *(addr_t *)
-			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
-		else
-			tmp = *(addr_t *)
-			       ((addr_t) child->thread.fpu.fprs + offset);
-
+		tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
 	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
 		/*
 		 * Handle access to the per_info structure.
@@ -385,24 +390,16 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		if ((unsigned int) data != 0 ||
-		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+		if ((unsigned int)data != 0)
 			return -EINVAL;
-		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+		child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32);
 
 	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
 		/*
-		 * floating point regs. are either in child->thread.fpu
-		 * or the child->thread.fpu.vxrs array
+		 * floating point regs. are in the child->thread.ufpu.vxrs array
 		 */
 		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
-		if (MACHINE_HAS_VX)
-			*(addr_t *)((addr_t)
-				child->thread.fpu.vxrs + 2*offset) = data;
-		else
-			*(addr_t *)((addr_t)
-				child->thread.fpu.fprs + offset) = data;
-
+		*(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
 	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
 		/*
 		 * Handle access to the per_info structure.
@@ -474,22 +471,20 @@ long arch_ptrace(struct task_struct *child, long request,
 		}
 		return 0;
 	case PTRACE_GET_LAST_BREAK:
-		put_user(child->thread.last_break,
-			 (unsigned long __user *) data);
-		return 0;
+		return put_user(child->thread.last_break, (unsigned long __user *)data);
 	case PTRACE_ENABLE_TE:
-		if (!MACHINE_HAS_TE)
+		if (!machine_has_tx())
 			return -EIO;
 		child->thread.per_flags &= ~PER_FLAG_NO_TE;
 		return 0;
 	case PTRACE_DISABLE_TE:
-		if (!MACHINE_HAS_TE)
+		if (!machine_has_tx())
 			return -EIO;
 		child->thread.per_flags |= PER_FLAG_NO_TE;
 		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
 		return 0;
 	case PTRACE_TE_ABORT_RAND:
-		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+		if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE))
 			return -EIO;
 		switch (data) {
 		case 0UL:
@@ -617,21 +612,14 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fpu.fpc;
+		tmp = child->thread.ufpu.fpc;
 
 	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
 		/*
-		 * floating point regs. are either in child->thread.fpu
-		 * or the child->thread.fpu.vxrs array
+		 * floating point regs. are in the child->thread.ufpu.vxrs array
 		 */
 		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
-		if (MACHINE_HAS_VX)
-			tmp = *(__u32 *)
-			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
-		else
-			tmp = *(__u32 *)
-			       ((addr_t) child->thread.fpu.fprs + offset);
-
+		tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
 	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
 		/*
 		 * Handle access to the per_info structure.
@@ -743,23 +731,14 @@ static int __poke_user_compat(struct task_struct *child,
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		if (test_fp_ctl(tmp))
-			return -EINVAL;
-		child->thread.fpu.fpc = data;
+		child->thread.ufpu.fpc = data;
 
 	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
 		/*
-		 * floating point regs. are either in child->thread.fpu
-		 * or the child->thread.fpu.vxrs array
+		 * floating point regs. are in the child->thread.ufpu.vxrs array
 		 */
 		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
-		if (MACHINE_HAS_VX)
-			*(__u32 *)((addr_t)
-				child->thread.fpu.vxrs + 2*offset) = tmp;
-		else
-			*(__u32 *)((addr_t)
-				child->thread.fpu.fprs + offset) = tmp;
-
+		*(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
 	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
 		/*
 		 * Handle access to the per_info structure.
@@ -824,9 +803,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		}
 		return 0;
 	case PTRACE_GET_LAST_BREAK:
-		put_user(child->thread.last_break,
-			 (unsigned int __user *) data);
-		return 0;
+		return put_user(child->thread.last_break, (unsigned int __user *)data);
 	}
 	return compat_ptrace_request(child, request, addr, data);
 }
@@ -892,10 +869,10 @@ static int s390_fpregs_get(struct task_struct *target,
 	_s390_fp_regs fp_regs;
 
 	if (target == current)
-		save_fpu_regs();
+		save_user_fpu_regs();
 
-	fp_regs.fpc = target->thread.fpu.fpc;
-	fpregs_store(&fp_regs, &target->thread.fpu);
+	fp_regs.fpc = target->thread.ufpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.ufpu);
 
 	return membuf_write(&to, &fp_regs, sizeof(fp_regs));
 }
@@ -909,23 +886,17 @@ static int s390_fpregs_set(struct task_struct *target,
 	freg_t fprs[__NUM_FPRS];
 
 	if (target == current)
-		save_fpu_regs();
-
-	if (MACHINE_HAS_VX)
-		convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
-	else
-		memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
-
-	/* If setting FPC, must validate it first. */
+		save_user_fpu_regs();
+	convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
-		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+		u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 					0, offsetof(s390_fp_regs, fprs));
 		if (rc)
 			return rc;
-		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+		if (ufpc[1] != 0)
 			return -EINVAL;
-		target->thread.fpu.fpc = ufpc[0];
+		target->thread.ufpu.fpc = ufpc[0];
 	}
 
 	if (rc == 0 && count > 0)
@@ -933,12 +904,7 @@ static int s390_fpregs_set(struct task_struct *target,
 					fprs, offsetof(s390_fp_regs, fprs), -1);
 	if (rc)
 		return rc;
-
-	if (MACHINE_HAS_VX)
-		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
-	else
-		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
-
+	convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
 	return rc;
 }
 
@@ -985,12 +951,12 @@ static int s390_vxrs_low_get(struct task_struct *target,
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
-	if (!MACHINE_HAS_VX)
+	if (!cpu_has_vx())
 		return -ENODEV;
 	if (target == current)
-		save_fpu_regs();
+		save_user_fpu_regs();
 	for (i = 0; i < __NUM_VXRS_LOW; i++)
-		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+		vxrs[i] = target->thread.ufpu.vxrs[i].low;
 	return membuf_write(&to, vxrs, sizeof(vxrs));
 }
 
@@ -1002,18 +968,18 @@ static int s390_vxrs_low_set(struct task_struct *target,
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i, rc;
 
-	if (!MACHINE_HAS_VX)
+	if (!cpu_has_vx())
 		return -ENODEV;
 	if (target == current)
-		save_fpu_regs();
+		save_user_fpu_regs();
 
 	for (i = 0; i < __NUM_VXRS_LOW; i++)
-		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+		vxrs[i] = target->thread.ufpu.vxrs[i].low;
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 	if (rc == 0)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
+			target->thread.ufpu.vxrs[i].low = vxrs[i];
 
 	return rc;
 }
@@ -1022,11 +988,11 @@ static int s390_vxrs_high_get(struct task_struct *target,
 			      const struct user_regset *regset,
 			      struct membuf to)
 {
-	if (!MACHINE_HAS_VX)
+	if (!cpu_has_vx())
 		return -ENODEV;
 	if (target == current)
-		save_fpu_regs();
-	return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+		save_user_fpu_regs();
+	return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW,
 			    __NUM_VXRS_HIGH * sizeof(__vector128));
 }
 
@@ -1037,13 +1003,13 @@ static int s390_vxrs_high_set(struct task_struct *target,
 {
 	int rc;
 
-	if (!MACHINE_HAS_VX)
+	if (!cpu_has_vx())
 		return -ENODEV;
 	if (target == current)
-		save_fpu_regs();
+		save_user_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+				target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1);
 	return rc;
 }
 
@@ -1070,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target,
 {
 	struct gs_cb *data = target->thread.gs_cb;
 
-	if (!MACHINE_HAS_GS)
+	if (!cpu_has_gs())
 		return -ENODEV;
 	if (!data)
 		return -ENODATA;
@@ -1087,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target,
 	struct gs_cb gs_cb = { }, *data = NULL;
 	int rc;
 
-	if (!MACHINE_HAS_GS)
+	if (!cpu_has_gs())
 		return -ENODEV;
 	if (!target->thread.gs_cb) {
 		data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1111,7 +1077,7 @@ static int s390_gs_cb_set(struct task_struct *target,
 		target->thread.gs_cb = data;
 	*target->thread.gs_cb = gs_cb;
 	if (target == current) {
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		restore_gs_cb(target->thread.gs_cb);
 	}
 	preempt_enable();
@@ -1124,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target,
 {
 	struct gs_cb *data = target->thread.gs_bc_cb;
 
-	if (!MACHINE_HAS_GS)
+	if (!cpu_has_gs())
 		return -ENODEV;
 	if (!data)
 		return -ENODATA;
@@ -1138,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target,
 {
 	struct gs_cb *data = target->thread.gs_bc_cb;
 
-	if (!MACHINE_HAS_GS)
+	if (!cpu_has_gs())
 		return -ENODEV;
 	if (!data) {
 		data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1558,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = {
 	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 };
 
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
-	if (offset >= NUM_GPRS)
-		return 0;
-	return regs->gprs[offset];
-}
-
 int regs_query_register_offset(const char *name)
 {
 	unsigned long offset;
@@ -1584,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset)
 		return NULL;
 	return gpr_names[offset];
 }
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
-	unsigned long ksp = kernel_stack_pointer(regs);
-
-	return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
-	unsigned long addr;
-
-	addr = kernel_stack_pointer(regs) + n * sizeof(long);
-	if (!regs_within_kernel_stack(regs, addr))
-		return 0;
-	return *(unsigned long *)addr;
-}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 4a22163962eb..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/nospec-insn.h>
 #include <asm/sigp.h>
+#include <asm/lowcore.h>
 
 	GEN_BR_THUNK %r9
 
@@ -19,21 +20,16 @@
 # r2 = Function to be called after store status
 # r3 = Parameter for function
 #
-ENTRY(store_status)
-	/* Save register one and load save area base */
-	stg	%r1,__LC_SAVE_AREA_RESTART
+SYM_CODE_START(store_status)
+	STMG_LC	%r0,%r15,__LC_GPREGS_SAVE_AREA
 	/* General purpose registers */
-	lghi	%r1,__LC_GPREGS_SAVE_AREA
-	stmg	%r0,%r15,0(%r1)
-	mvc	8(8,%r1),__LC_SAVE_AREA_RESTART
+	GET_LC	%r13
 	/* Control registers */
-	lghi	%r1,__LC_CREGS_SAVE_AREA
-	stctg	%c0,%c15,0(%r1)
+	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
 	/* Access registers */
-	lghi	%r1,__LC_AREGS_SAVE_AREA
-	stam	%a0,%a15,0(%r1)
+	stamy	%a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
 	/* Floating point registers */
-	lghi	%r1,__LC_FPREGS_SAVE_AREA
+	lay	%r1,__LC_FPREGS_SAVE_AREA(%r13)
 	std	%f0, 0x00(%r1)
 	std	%f1, 0x08(%r1)
 	std	%f2, 0x10(%r1)
@@ -51,21 +47,21 @@ ENTRY(store_status)
 	std	%f14,0x70(%r1)
 	std	%f15,0x78(%r1)
 	/* Floating point control register */
-	lghi	%r1,__LC_FP_CREG_SAVE_AREA
+	lay	%r1,__LC_FP_CREG_SAVE_AREA(%r13)
 	stfpc	0(%r1)
 	/* CPU timer */
-	lghi	%r1,__LC_CPU_TIMER_SAVE_AREA
+	lay	%r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
 	stpt	0(%r1)
 	/* Store prefix register */
-	lghi	%r1,__LC_PREFIX_SAVE_AREA
+	lay	%r1,__LC_PREFIX_SAVE_AREA(%r13)
 	stpx	0(%r1)
 	/* Clock comparator - seven bytes */
-	lghi	%r1,__LC_CLOCK_COMP_SAVE_AREA
-	larl	%r4,.Lclkcmp
+	larl	%r4,clkcmp
 	stckc	0(%r4)
+	lay	%r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
 	mvc	1(7,%r1),1(%r4)
 	/* Program status word */
-	lghi	%r1,__LC_PSW_SAVE_AREA
+	lay	%r1,__LC_PSW_SAVE_AREA(%r13)
 	epsw	%r4,%r5
 	st	%r4,0(%r1)
 	st	%r5,4(%r1)
@@ -73,9 +69,9 @@ ENTRY(store_status)
 	lgr	%r9,%r2
 	lgr	%r2,%r3
 	BR_EX	%r9
-ENDPROC(store_status)
+SYM_CODE_END(store_status)
 
 	.section .bss
-	.align	8
-.Lclkcmp:	.quad	0x0000000000000000
+	.balign	8
+SYM_DATA_LOCAL(clkcmp,	.quad 0x0000000000000000)
 	.previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index a9a1a6f45375..0ae297c82afd 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -26,53 +26,51 @@
  */
 
 	.text
-ENTRY(relocate_kernel)
-		basr	%r13,0		# base address
-	.base:
-		lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
-		lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
-		lg	%r5,0(%r2)	# read another word for indirection page
-		aghi	%r2,8		# increment pointer
-		tml	%r5,0x1		# is it a destination page?
-		je	.indir_check	# NO, goto "indir_check"
-		lgr	%r6,%r5		# r6 = r5
-		nill	%r6,0xf000	# mask it out and...
-		j	.base		# ...next iteration
-	.indir_check:
-		tml	%r5,0x2		# is it a indirection page?
-		je	.done_test	# NO, goto "done_test"
-		nill	%r5,0xf000	# YES, mask out,
-		lgr	%r2,%r5		# move it into the right register,
-		j	.base		# and read next...
-	.done_test:
-		tml	%r5,0x4		# is it the done indicator?
-		je	.source_test	# NO! Well, then it should be the source indicator...
-		j	.done		# ok, lets finish it here...
-	.source_test:
-		tml	%r5,0x8		# it should be a source indicator...
-		je	.base		# NO, ignore it...
-		lgr	%r8,%r5		# r8 = r5
-		nill	%r8,0xf000	# masking
-	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
-		jo	0b
-		j	.base
-	.done:
-		lgr	%r0,%r4		# subcode
-		cghi	%r3,0
-		je	.diag
-		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
-		o	%r3,4(%r4)	# or load address into psw
-		st	%r3,4(%r4)
-		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
-	.diag:
-		diag	%r0,%r0,0x308
-ENDPROC(relocate_kernel)
+SYM_CODE_START(relocate_kernel)
+	basr	%r13,0		# base address
+.base:
+	lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
+	lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
+	lg	%r5,0(%r2)	# read another word for indirection page
+	aghi	%r2,8		# increment pointer
+	tml	%r5,0x1		# is it a destination page?
+	je	.indir_check	# NO, goto "indir_check"
+	lgr	%r6,%r5		# r6 = r5
+	nill	%r6,0xf000	# mask it out and...
+	j	.base		# ...next iteration
+.indir_check:
+	tml	%r5,0x2		# is it a indirection page?
+	je	.done_test	# NO, goto "done_test"
+	nill	%r5,0xf000	# YES, mask out,
+	lgr	%r2,%r5		# move it into the right register,
+	j	.base		# and read next...
+.done_test:
+	tml	%r5,0x4		# is it the done indicator?
+	je	.source_test	# NO! Well, then it should be the source indicator...
+	j	.done		# ok, lets finish it here...
+.source_test:
+	tml	%r5,0x8		# it should be a source indicator...
+	je	.base		# NO, ignore it...
+	lgr	%r8,%r5		# r8 = r5
+	nill	%r8,0xf000	# masking
+0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+	jo	0b
+	j	.base
+.done:
+	lgr	%r0,%r4		# subcode
+	cghi	%r3,0
+	je	.diag
+	la	%r4,load_psw-.base(%r13)	# load psw-address into the register
+	o	%r3,4(%r4)	# or load address into psw
+	st	%r3,4(%r4)
+	mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
+.diag:
+	diag	%r0,%r0,0x308
+SYM_CODE_END(relocate_kernel)
 
-		.align	8
-	load_psw:
-		.long	0x00080000,0x80000000
-	relocate_kernel_end:
-	.align 8
-	.globl	relocate_kernel_len
-	relocate_kernel_len:
-		.quad	relocate_kernel_end - relocate_kernel
+	.balign	8
+SYM_DATA_START_LOCAL(load_psw)
+	.long	0x00080000,0x80000000
+SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end)
+	.balign 8
+SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel)
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 000000000000..af10e6bdd34e
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+	rh->ret_addr = regs->gprs[14];
+	rh->frame = regs->gprs[15];
+
+	/* Replace the return addr with trampoline addr */
+	regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+			       unsigned long correct_ret_addr)
+{
+	/* Replace fake return address with real one. */
+	regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+	return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 000000000000..32f069eed3f3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ebad41afe355..f244c5560e7f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -52,13 +52,15 @@
 #include <linux/hugetlb.h>
 #include <linux/kmemleak.h>
 
+#include <asm/archrandom.h>
 #include <asm/boot_data.h>
+#include <asm/machine.h>
 #include <asm/ipl.h>
 #include <asm/facility.h>
 #include <asm/smp.h>
 #include <asm/mmu_context.h>
 #include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/nmi.h>
 #include <asm/irq.h>
 #include <asm/page.h>
@@ -73,7 +75,8 @@
 #include <asm/numa.h>
 #include <asm/alternative.h>
 #include <asm/nospec-branch.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
 #include <asm/uv.h>
 #include <asm/asm-offsets.h>
 #include "entry.h"
@@ -95,10 +98,10 @@ EXPORT_SYMBOL(console_irq);
  * relocated above 2 GB, because it has to use 31 bit addresses.
  * Such code and data is part of the .amode31 section.
  */
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
 
@@ -143,39 +146,41 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
-int __bootdata(noexec_disabled);
-unsigned long __bootdata(ident_map_size);
-struct mem_detect_info __bootdata(mem_detect);
-struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata_preserved(max_mappable);
+struct physmem_info __bootdata(physmem_info);
 
-unsigned long __bootdata_preserved(__kaslr_offset);
-unsigned long __bootdata(__amode31_base);
+struct vm_layout __bootdata_preserved(vm_layout);
+EXPORT_SYMBOL(vm_layout);
+int __bootdata_preserved(__kaslr_enabled);
 unsigned int __bootdata_preserved(zlib_dfltcc_support);
 EXPORT_SYMBOL(zlib_dfltcc_support);
 u64 __bootdata_preserved(stfle_fac_list[16]);
 EXPORT_SYMBOL(stfle_fac_list);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
 struct oldmem_data __bootdata_preserved(oldmem_data);
 
-unsigned long VMALLOC_START;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
 EXPORT_SYMBOL(VMALLOC_START);
 
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
 EXPORT_SYMBOL(VMALLOC_END);
 
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
 EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
 
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
 
 /* An array with a pointer to the lowcore of every CPU. */
 struct lowcore *lowcore_ptr[NR_CPUS];
 EXPORT_SYMBOL(lowcore_ptr);
 
-DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
-
 /*
  * The Write Back bit position in the physaddr is given by the SLPC PCI.
  * Leaving the mask zero always uses write through which is safe
@@ -245,7 +250,7 @@ static void __init conmode_default(void)
 	char query_buffer[1024];
 	char *ptr;
 
-        if (MACHINE_IS_VM) {
+	if (machine_is_vm()) {
 		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
 		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
 		ptr = strstr(query_buffer, "SUBCHANNEL =");
@@ -283,7 +288,7 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 #endif
 		}
-	} else if (MACHINE_IS_KVM) {
+	} else if (machine_is_kvm()) {
 		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 			SET_CONSOLE_VT220;
 		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
@@ -304,7 +309,7 @@ static void __init setup_zfcpdump(void)
 		return;
 	if (oldmem_data.start)
 		return;
-	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+	strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
 	console_loglevel = 2;
 }
 #else
@@ -359,63 +364,30 @@ void *restart_stack;
 
 unsigned long stack_alloc(void)
 {
-#ifdef CONFIG_VMAP_STACK
-	void *ret;
+	void *stack;
 
-	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
-			     NUMA_NO_NODE, __builtin_return_address(0));
-	kmemleak_not_leak(ret);
-	return (unsigned long)ret;
-#else
-	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+	stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			       NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(stack);
+	return (unsigned long)stack;
 }
 
 void stack_free(unsigned long stack)
 {
-#ifdef CONFIG_VMAP_STACK
-	vfree((void *) stack);
-#else
-	free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+	vfree((void *)stack);
 }
 
-int __init arch_early_irq_init(void)
+static unsigned long __init stack_alloc_early(void)
 {
 	unsigned long stack;
 
-	stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-	if (!stack)
-		panic("Couldn't allocate async stack");
-	S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
-	return 0;
+	stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
+	return stack;
 }
 
-void __init arch_call_rest_init(void)
+static void __init setup_lowcore(void)
 {
-	unsigned long stack;
-
-	stack = stack_alloc();
-	if (!stack)
-		panic("Couldn't allocate kernel stack");
-	current->stack = (void *) stack;
-#ifdef CONFIG_VMAP_STACK
-	current->stack_vm_area = (void *) stack;
-#endif
-	set_task_stack_end_magic(current);
-	stack += STACK_INIT_OFFSET;
-	S390_lowcore.kernel_stack = stack;
-	call_on_stack_noreturn(rest_init, stack);
-}
-
-static void __init setup_lowcore_dat_off(void)
-{
-	unsigned long int_psw_mask = PSW_KERNEL_BITS;
-	unsigned long mcck_stack;
-	struct lowcore *lc;
-
-	if (IS_ENABLED(CONFIG_KASAN))
-		int_psw_mask |= PSW_MASK_DAT;
+	struct lowcore *lc, *abs_lc;
 
 	/*
 	 * Setup lowcore for boot cpu
@@ -426,44 +398,40 @@ static void __init setup_lowcore_dat_off(void)
 		panic("%s: Failed to allocate %zu bytes align=%zx\n",
 		      __func__, sizeof(*lc), sizeof(*lc));
 
-	lc->restart_psw.mask = PSW_KERNEL_BITS;
-	lc->restart_psw.addr = (unsigned long) restart_int_handler;
-	lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+	lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
+	lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+	lc->restart_psw.addr = __pa(restart_int_handler);
+	lc->external_new_psw.mask = PSW_KERNEL_BITS;
 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
-	lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+	lc->svc_new_psw.mask = PSW_KERNEL_BITS;
 	lc->svc_new_psw.addr = (unsigned long) system_call;
-	lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+	lc->program_new_psw.mask = PSW_KERNEL_BITS;
 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
-	lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+	lc->io_new_psw.mask = PSW_KERNEL_BITS;
 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
 	lc->clock_comparator = clock_comparator_max;
-	lc->nodat_stack = ((unsigned long) &init_thread_union)
-		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->current_task = (unsigned long)&init_task;
 	lc->lpp = LPP_MAGIC;
-	lc->machine_flags = S390_lowcore.machine_flags;
-	lc->preempt_count = S390_lowcore.preempt_count;
+	lc->preempt_count = get_lowcore()->preempt_count;
 	nmi_alloc_mcesa_early(&lc->mcesad);
-	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
-	lc->exit_timer = S390_lowcore.exit_timer;
-	lc->user_timer = S390_lowcore.user_timer;
-	lc->system_timer = S390_lowcore.system_timer;
-	lc->steal_timer = S390_lowcore.steal_timer;
-	lc->last_update_timer = S390_lowcore.last_update_timer;
-	lc->last_update_clock = S390_lowcore.last_update_clock;
-
+	lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+	lc->exit_timer = get_lowcore()->exit_timer;
+	lc->user_timer = get_lowcore()->user_timer;
+	lc->system_timer = get_lowcore()->system_timer;
+	lc->steal_timer = get_lowcore()->steal_timer;
+	lc->last_update_timer = get_lowcore()->last_update_timer;
+	lc->last_update_clock = get_lowcore()->last_update_clock;
 	/*
 	 * Allocate the global restart stack which is the same for
-	 * all CPUs in cast *one* of them does a PSW restart.
+	 * all CPUs in case *one* of them does a PSW restart.
 	 */
-	restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-	if (!restart_stack)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, THREAD_SIZE, THREAD_SIZE);
-	restart_stack += STACK_INIT_OFFSET;
-
+	restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
+	lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->kernel_stack = get_lowcore()->kernel_stack;
 	/*
 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
 	 * restart data to the absolute zero lowcore. This is necessary if
@@ -473,47 +441,31 @@ static void __init setup_lowcore_dat_off(void)
 	lc->restart_fn = (unsigned long) do_restart;
 	lc->restart_data = 0;
 	lc->restart_source = -1U;
-
-	mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-	if (!mcck_stack)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, THREAD_SIZE, THREAD_SIZE);
-	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
-
-	/* Setup absolute zero lowcore */
-	put_abs_lowcore(restart_stack, lc->restart_stack);
-	put_abs_lowcore(restart_fn, lc->restart_fn);
-	put_abs_lowcore(restart_data, lc->restart_data);
-	put_abs_lowcore(restart_source, lc->restart_source);
-	put_abs_lowcore(restart_psw, lc->restart_psw);
-
 	lc->spinlock_lockval = arch_spin_lockval(0);
 	lc->spinlock_index = 0;
 	arch_spin_lock_setup(0);
 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
 	lc->preempt_count = PREEMPT_DISABLED;
+	lc->kernel_asce = get_lowcore()->kernel_asce;
+	lc->user_asce = get_lowcore()->user_asce;
+
+	system_ctlreg_init_save_area(lc);
+	abs_lc = get_abs_lowcore();
+	abs_lc->restart_stack = lc->restart_stack;
+	abs_lc->restart_fn = lc->restart_fn;
+	abs_lc->restart_data = lc->restart_data;
+	abs_lc->restart_source = lc->restart_source;
+	abs_lc->restart_psw = lc->restart_psw;
+	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+	abs_lc->program_new_psw = lc->program_new_psw;
+	abs_lc->mcesad = lc->mcesad;
+	put_abs_lowcore(abs_lc);
 
 	set_prefix(__pa(lc));
 	lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
-	struct lowcore *lc = lowcore_ptr[0];
-	int cr;
-
-	__ctl_clear_bit(0, 28);
-	S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
-	S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
-	S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
-	S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
-	__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
-	__ctl_set_bit(0, 28);
-	put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
-	put_abs_lowcore(program_new_psw, lc->program_new_psw);
-	for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
-		put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+	if (abs_lowcore_map(0, lowcore_ptr[0], false))
+		panic("Couldn't setup absolute lowcore");
 }
 
 static struct resource code_resource = {
@@ -544,25 +496,22 @@ static void __init setup_resources(void)
 	int j;
 	u64 i;
 
-	code_resource.start = (unsigned long) _text;
-	code_resource.end = (unsigned long) _etext - 1;
-	data_resource.start = (unsigned long) _etext;
-	data_resource.end = (unsigned long) _edata - 1;
-	bss_resource.start = (unsigned long) __bss_start;
-	bss_resource.end = (unsigned long) __bss_stop - 1;
+	code_resource.start = __pa_symbol(_text);
+	code_resource.end = __pa_symbol(_etext) - 1;
+	data_resource.start = __pa_symbol(_etext);
+	data_resource.end = __pa_symbol(_edata) - 1;
+	bss_resource.start = __pa_symbol(__bss_start);
+	bss_resource.end = __pa_symbol(__bss_stop) - 1;
 
 	for_each_mem_range(i, &start, &end) {
-		res = memblock_alloc(sizeof(*res), 8);
-		if (!res)
-			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-			      __func__, sizeof(*res), 8);
+		res = memblock_alloc_or_panic(sizeof(*res), 8);
 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
 		res->start = start;
 		/*
 		 * In memblock, end points to the first byte after the
-		 * range while in resourses, end points to the last byte in
+		 * range while in resources, end points to the last byte in
 		 * the range.
 		 */
 		res->end = end - 1;
@@ -574,10 +523,7 @@ static void __init setup_resources(void)
 			    std_res->start > res->end)
 				continue;
 			if (std_res->end > res->end) {
-				sub_res = memblock_alloc(sizeof(*sub_res), 8);
-				if (!sub_res)
-					panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-					      __func__, sizeof(*sub_res), 8);
+				sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
 				*sub_res = *std_res;
 				sub_res->end = res->end;
 				std_res->start = res->end + 1;
@@ -606,7 +552,6 @@ static void __init setup_resources(void)
 
 static void __init setup_memory_end(void)
 {
-	memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
 	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
 	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
 }
@@ -638,6 +583,18 @@ static struct notifier_block kdump_mem_nb = {
 #endif
 
 /*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+	unsigned long start, end;
+	struct reserved_range *range;
+
+	for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+		memblock_reserve(start, end - start);
+}
+
+/*
  * Reserve memory for kdump kernel to be loaded with kexec
  */
 static void __init reserve_crashkernel(void)
@@ -647,8 +604,8 @@ static void __init reserve_crashkernel(void)
 	phys_addr_t low, high;
 	int rc;
 
-	rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
-			       &crash_base);
+	rc = parse_crashkernel(boot_command_line, ident_map_size,
+			       &crash_size, &crash_base, NULL, NULL);
 
 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -693,7 +650,7 @@ static void __init reserve_crashkernel(void)
 		return;
 	}
 
-	if (!oldmem_data.start && MACHINE_IS_VM)
+	if (!oldmem_data.start && machine_is_vm())
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
@@ -711,13 +668,13 @@ static void __init reserve_crashkernel(void)
  */
 static void __init reserve_initrd(void)
 {
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (!initrd_data.start || !initrd_data.size)
+	unsigned long addr, size;
+
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
 		return;
-	initrd_start = (unsigned long)__va(initrd_data.start);
-	initrd_end = initrd_start + initrd_data.size;
-	memblock_reserve(initrd_data.start, initrd_data.size);
-#endif
+	initrd_start = (unsigned long)__va(addr);
+	initrd_end = initrd_start + size;
+	memblock_reserve(addr, size);
 }
 
 /*
@@ -729,80 +686,64 @@ static void __init reserve_certificate_list(void)
 		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
 }
 
-static void __init reserve_mem_detect_info(void)
+static void __init reserve_physmem_info(void)
 {
-	unsigned long start, size;
+	unsigned long addr, size;
 
-	get_mem_detect_reserved(&start, &size);
-	if (size)
-		memblock_reserve(start, size);
+	if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+		memblock_reserve(addr, size);
 }
 
-static void __init free_mem_detect_info(void)
+static void __init free_physmem_info(void)
 {
-	unsigned long start, size;
+	unsigned long addr, size;
 
-	get_mem_detect_reserved(&start, &size);
-	if (size)
-		memblock_phys_free(start, size);
+	if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+		memblock_phys_free(addr, size);
 }
 
-static const char * __init get_mem_info_source(void)
-{
-	switch (mem_detect.info_source) {
-	case MEM_DETECT_SCLP_STOR_INFO:
-		return "sclp storage info";
-	case MEM_DETECT_DIAG260:
-		return "diag260";
-	case MEM_DETECT_SCLP_READ_INFO:
-		return "sclp read info";
-	case MEM_DETECT_BIN_SEARCH:
-		return "binary search";
-	}
-	return "none";
-}
-
-static void __init memblock_add_mem_detect_info(void)
+static void __init memblock_add_physmem_info(void)
 {
 	unsigned long start, end;
 	int i;
 
 	pr_debug("physmem info source: %s (%hhd)\n",
-		 get_mem_info_source(), mem_detect.info_source);
+		 get_physmem_info_source(), physmem_info.info_source);
 	/* keep memblock lists close to the kernel */
 	memblock_set_bottom_up(true);
-	for_each_mem_detect_block(i, &start, &end) {
+	for_each_physmem_usable_range(i, &start, &end)
 		memblock_add(start, end - start);
+	for_each_physmem_online_range(i, &start, &end)
 		memblock_physmem_add(start, end - start);
-	}
 	memblock_set_bottom_up(false);
 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 }
 
 /*
- * Check for initrd being in usable memory
+ * Reserve memory used for lowcore.
  */
-static void __init check_initrd(void)
+static void __init reserve_lowcore(void)
 {
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (initrd_data.start && initrd_data.size &&
-	    !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
-		pr_err("The initial RAM disk does not fit into the memory\n");
-		memblock_phys_free(initrd_data.start, initrd_data.size);
-		initrd_start = initrd_end = 0;
+	void *lowcore_start = get_lowcore();
+	void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+	void *start, *end;
+
+	if (absolute_pointer(__identity_base) < lowcore_end) {
+		start = max(lowcore_start, (void *)__identity_base);
+		end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+		memblock_reserve(__pa(start), __pa(end));
 	}
-#endif
 }
 
 /*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for absolute lowcore/command line/kernel image.
  */
 static void __init reserve_kernel(void)
 {
 	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
 	memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
 	memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
-	memblock_reserve(__amode31_base, __eamode31 - __samode31);
+	memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
 	memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
 	memblock_reserve(__pa(_stext), _end - _stext);
 }
@@ -824,15 +765,15 @@ static void __init setup_memory(void)
 static void __init relocate_amode31_section(void)
 {
 	unsigned long amode31_size = __eamode31 - __samode31;
-	long amode31_offset = __amode31_base - __samode31;
-	long *ptr;
+	long amode31_offset, *ptr;
 
+	amode31_offset = AMODE31_START - (unsigned long)__samode31;
 	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
 
 	/* Move original AMODE31 section to the new one */
-	memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
+	memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
 	/* Zero out the old AMODE31 section to catch invalid accesses within it */
-	memset((void *)__samode31, 0, amode31_size);
+	memset(__samode31, 0, amode31_size);
 
 	/* Update all AMODE31 region references */
 	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
@@ -851,15 +792,15 @@ static void __init setup_cr(void)
 	__ctl_duct[4] = (unsigned long)__ctl_duald;
 
 	/* Update control registers CR2, CR5 and CR15 */
-	__ctl_store(cr2.val, 2, 2);
-	__ctl_store(cr5.val, 5, 5);
-	__ctl_store(cr15.val, 15, 15);
+	local_ctl_store(2, &cr2.reg);
+	local_ctl_store(5, &cr5.reg);
+	local_ctl_store(15, &cr15.reg);
 	cr2.ducto = (unsigned long)__ctl_duct >> 6;
 	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
 	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
-	__ctl_load(cr2.val, 2, 2);
-	__ctl_load(cr5.val, 5, 5);
-	__ctl_load(cr15.val, 15, 15);
+	system_ctl_load(2, &cr2.reg);
+	system_ctl_load(5, &cr5.reg);
+	system_ctl_load(15, &cr15.reg);
 }
 
 /*
@@ -869,9 +810,7 @@ static void __init setup_randomness(void)
 {
 	struct sysinfo_3_2_2 *vmms;
 
-	vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	if (!vmms)
-		panic("Failed to allocate memory for sysinfo structure\n");
+	vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
 	memblock_free(vmms, PAGE_SIZE);
@@ -881,22 +820,6 @@ static void __init setup_randomness(void)
 }
 
 /*
- * Find the correct size for the task_struct. This depends on
- * the size of the struct fpu at the end of the thread_struct
- * which is embedded in the task_struct.
- */
-static void __init setup_task_size(void)
-{
-	int task_size = sizeof(struct task_struct);
-
-	if (!MACHINE_HAS_VX) {
-		task_size -= sizeof(__vector128) * __NUM_VXRS;
-		task_size += sizeof(freg_t) * __NUM_FPRS;
-	}
-	arch_task_struct_size = task_size;
-}
-
-/*
  * Issue diagnose 318 to set the control program name and
  * version codes.
  */
@@ -928,7 +851,7 @@ static void __init log_component_list(void)
 		pr_info("Linux is running with Secure-IPL enabled\n");
 	else
 		pr_info("Linux is running with Secure-IPL disabled\n");
-	ptr = (void *) early_ipl_comp_list_addr;
+	ptr = __va(early_ipl_comp_list_addr);
 	end = (void *) ptr + early_ipl_comp_list_size;
 	pr_info("The IPL report contains the following components:\n");
 	while (ptr < end) {
@@ -947,6 +870,23 @@ static void __init log_component_list(void)
 }
 
 /*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+	char fmt[] = KERN_SOH "0boot: %s";
+	int level = printk_get_level(buf);
+
+	buf = skip_timestamp(printk_skip_level(buf));
+	if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+		return;
+
+	fmt[1] = level;
+	printk(fmt, buf);
+}
+
+/*
  * Setup function called from init/main.c just after the banner
  * was printed.
  */
@@ -956,15 +896,21 @@ void __init setup_arch(char **cmdline_p)
         /*
          * print what head.S has found out about the machine
          */
-	if (MACHINE_IS_VM)
+	if (machine_is_vm())
 		pr_info("Linux is running as a z/VM "
 			"guest operating system in 64-bit mode\n");
-	else if (MACHINE_IS_KVM)
+	else if (machine_is_kvm())
 		pr_info("Linux is running under KVM in 64-bit mode\n");
-	else if (MACHINE_IS_LPAR)
+	else if (machine_is_lpar())
 		pr_info("Linux is running natively in 64-bit mode\n");
 	else
 		pr_info("Linux is running as a guest in 64-bit mode\n");
+	/* Print decompressor messages if not already printed */
+	if (!boot_earlyprintk)
+		boot_rb_foreach(print_rb_entry);
+
+	if (machine_has_relocated_lowcore())
+		pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
 
 	log_component_list();
 
@@ -988,21 +934,22 @@ void __init setup_arch(char **cmdline_p)
 
 	os_info_init();
 	setup_ipl();
-	setup_task_size();
 	setup_control_program_code();
 
 	/* Do some memory reservations *before* memory is added to memblock */
+	reserve_pgtables();
+	reserve_lowcore();
 	reserve_kernel();
 	reserve_initrd();
 	reserve_certificate_list();
-	reserve_mem_detect_info();
+	reserve_physmem_info();
 	memblock_set_current_limit(ident_map_size);
 	memblock_allow_resize();
 
 	/* Get information about *all* installed memory */
-	memblock_add_mem_detect_info();
+	memblock_add_physmem_info();
 
-	free_mem_detect_info();
+	free_physmem_info();
 	setup_memory_end();
 	memblock_dump_all();
 	setup_memory();
@@ -1012,33 +959,29 @@ void __init setup_arch(char **cmdline_p)
 	setup_uv();
 	dma_contiguous_reserve(ident_map_size);
 	vmcp_cma_reserve();
-	if (MACHINE_HAS_EDAT2)
+	if (cpu_has_edat2())
 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 
-	check_initrd();
 	reserve_crashkernel();
 #ifdef CONFIG_CRASH_DUMP
 	/*
-	 * Be aware that smp_save_dump_cpus() triggers a system reset.
+	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
 	 * Therefore CPU and device initialization should be done afterwards.
 	 */
-	smp_save_dump_cpus();
+	smp_save_dump_secondary_cpus();
 #endif
 
 	setup_resources();
-	setup_lowcore_dat_off();
+	setup_lowcore();
 	smp_fill_possible_mask();
 	cpu_detect_mhz_feature();
         cpu_init();
 	numa_setup();
 	smp_detect_cpus();
 	topology_init_early();
-
-	if (test_facility(193))
-		static_branch_enable(&cpu_has_bear);
-
+	setup_protection_map();
 	/*
-	 * Create kernel page tables and switch to virtual addressing.
+	 * Create kernel page tables.
 	 */
         paging_init();
 
@@ -1046,7 +989,9 @@ void __init setup_arch(char **cmdline_p)
 	 * After paging_init created the kernel page table, the new PSWs
 	 * in lowcore can now run with DAT enabled.
 	 */
-	setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+	smp_save_dump_ipl_cpu();
+#endif
 
         /* Setup default console */
 	conmode_default();
@@ -1062,3 +1007,8 @@ void __init setup_arch(char **cmdline_p)
 	/* Add system specific data to the random pool */
 	setup_randomness();
 }
+
+void __init arch_cpu_finalize_init(void)
+{
+	sclp_init();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 38258f817048..e48013cd832c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/rseq.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
@@ -29,9 +30,9 @@
 #include <linux/compat.h>
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
+#include <asm/access-regs.h>
 #include <asm/lowcore.h>
-#include <asm/switch_to.h>
-#include <asm/vdso.h>
 #include "entry.h"
 
 /*
@@ -108,7 +109,7 @@ struct rt_sigframe
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fpu_regs();
+	save_user_fpu_regs();
 }
 
 /* Load registers after signal return */
@@ -130,7 +131,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+	fpregs_store(&user_sregs.fpregs, &current->thread.ufpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 		return -EFAULT;
 	return 0;
@@ -149,10 +150,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 		return -EINVAL;
 
-	/* Test the floating-point-control word. */
-	if (test_fp_ctl(user_sregs.fpregs.fpc))
-		return -EINVAL;
-
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
 	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
 		(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
@@ -168,7 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 
-	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+	fpregs_load(&user_sregs.fpregs, &current->thread.ufpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -182,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+			vxrs[i] = current->thread.ufpu.vxrs[i].low;
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -202,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+			current->thread.ufpu.vxrs[i].low = vxrs[i];
 	}
 	return 0;
 }
@@ -225,7 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 	set_current_blocked(&set);
-	save_fpu_regs();
+	save_user_fpu_regs();
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -249,7 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
-	save_fpu_regs();
+	save_user_fpu_regs();
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -300,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	 * included in the signal frame on a 31-bit system.
 	 */
 	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
-	if (MACHINE_HAS_VX)
+	if (cpu_has_vx())
 		frame_size += sizeof(frame->sregs_ext);
 	frame = get_sigframe(ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
@@ -377,7 +374,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	 * included in the signal frame on a 31-bit system.
 	 */
 	uc_flags = 0;
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		frame_size += sizeof(_sigregs_ext);
 		uc_flags |= UC_VXRS;
 	}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..81f12bb77f62 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -18,6 +18,7 @@
 #define KMSG_COMPONENT "cpu"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/cpufeature.h>
 #include <linux/workqueue.h>
 #include <linux/memblock.h>
 #include <linux/export.h>
@@ -36,16 +37,20 @@
 #include <linux/sched/task_stack.h>
 #include <linux/crash_dump.h>
 #include <linux/kprobes.h>
+#include <asm/access-regs.h>
 #include <asm/asm-offsets.h>
+#include <asm/machine.h>
+#include <asm/ctlreg.h>
+#include <asm/pfault.h>
 #include <asm/diag.h>
-#include <asm/switch_to.h>
 #include <asm/facility.h>
+#include <asm/fpu.h>
 #include <asm/ipl.h>
 #include <asm/setup.h>
 #include <asm/irq.h>
 #include <asm/tlbflush.h>
 #include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/sclp.h>
 #include <asm/debug.h>
 #include <asm/os_info.h>
@@ -55,6 +60,7 @@
 #include <asm/stacktrace.h>
 #include <asm/topology.h>
 #include <asm/vdso.h>
+#include <asm/maccess.h>
 #include "entry.h"
 
 enum {
@@ -70,18 +76,15 @@ enum {
 	CPU_STATE_CONFIGURED,
 };
 
-static DEFINE_PER_CPU(struct cpu *, cpu_device);
-
-struct pcpu {
-	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
-	unsigned long ec_clk;		/* sigp timestamp for ec_xxx */
-	signed char state;		/* physical cpu state */
-	signed char polarization;	/* physical polarization */
-	u16 address;			/* physical cpu address */
-};
-
 static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
 
 unsigned int smp_cpu_mt_shift;
 EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -96,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
 static unsigned int smp_max_threads __initdata = -1U;
 cpumask_t cpu_setup_mask;
 
-static int __init early_nosmt(char *s)
-{
-	smp_max_threads = 1;
-	return 0;
-}
-early_param("nosmt", early_nosmt);
-
 static int __init early_smt(char *s)
 {
 	get_option(&s, &smp_max_threads);
@@ -112,7 +108,7 @@ early_param("smt", early_smt);
 
 /*
  * The smp_cpu_state_mutex must be held when changing the state or polarization
- * member of a pcpu data structure within the pcpu_devices arreay.
+ * member of a pcpu data structure within the pcpu_devices array.
  */
 DEFINE_MUTEX(smp_cpu_state_mutex);
 
@@ -172,8 +168,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
 	int cpu;
 
 	for_each_cpu(cpu, mask)
-		if (pcpu_devices[cpu].address == address)
-			return pcpu_devices + cpu;
+		if (per_cpu(pcpu_devices, cpu).address == address)
+			return &per_cpu(pcpu_devices, cpu);
 	return NULL;
 }
 
@@ -199,7 +195,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	mcck_stack = stack_alloc();
 	if (!lc || !nodat_stack || !async_stack || !mcck_stack)
 		goto out;
-	memcpy(lc, &S390_lowcore, 512);
+	memcpy(lc, get_lowcore(), 512);
 	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
 	lc->async_stack = async_stack + STACK_INIT_OFFSET;
 	lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
@@ -212,10 +208,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->preempt_count = PREEMPT_DISABLED;
 	if (nmi_alloc_mcesa(&lc->mcesad))
 		goto out;
+	if (abs_lowcore_map(cpu, lc, true))
+		goto out_mcesa;
 	lowcore_ptr[cpu] = lc;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
 	return 0;
 
+out_mcesa:
+	nmi_free_mcesa(&lc->mcesad);
 out:
 	stack_free(mcck_stack);
 	stack_free(async_stack);
@@ -224,19 +224,18 @@ out:
 	return -ENOMEM;
 }
 
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
 {
 	unsigned long async_stack, nodat_stack, mcck_stack;
 	struct lowcore *lc;
-	int cpu;
 
-	cpu = pcpu - pcpu_devices;
 	lc = lowcore_ptr[cpu];
 	nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
 	async_stack = lc->async_stack - STACK_INIT_OFFSET;
 	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
 	lowcore_ptr[cpu] = NULL;
+	abs_lowcore_unmap(cpu);
 	nmi_free_mcesa(&lc->mcesad);
 	stack_free(async_stack);
 	stack_free(mcck_stack);
@@ -246,37 +245,37 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 
 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
-	struct lowcore *lc = lowcore_ptr[cpu];
+	struct lowcore *lc, *abs_lc;
 
+	lc = lowcore_ptr[cpu];
 	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
 	lc->cpu_nr = cpu;
+	lc->pcpu = (unsigned long)pcpu;
 	lc->restart_flags = RESTART_FLAG_CTLREGS;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->spinlock_index = 0;
 	lc->percpu_offset = __per_cpu_offset[cpu];
-	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->kernel_asce = get_lowcore()->kernel_asce;
 	lc->user_asce = s390_invalid_asce;
-	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->user_timer = lc->system_timer =
 		lc->steal_timer = lc->avg_steal_timer = 0;
-	__ctl_store(lc->cregs_save_area, 0, 15);
-	lc->cregs_save_area[1] = lc->kernel_asce;
+	abs_lc = get_abs_lowcore();
+	memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+	put_abs_lowcore(abs_lc);
+	lc->cregs_save_area[1] = lc->user_asce;
 	lc->cregs_save_area[7] = lc->user_asce;
 	save_access_regs((unsigned int *) lc->access_regs_save_area);
 	arch_spin_lock_setup(cpu);
 }
 
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
 {
 	struct lowcore *lc;
-	int cpu;
 
-	cpu = pcpu - pcpu_devices;
 	lc = lowcore_ptr[cpu];
-	lc->kernel_stack = (unsigned long) task_stack_page(tsk)
-		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
-	lc->current_task = (unsigned long) tsk;
+	lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
+	lc->current_task = (unsigned long)tsk;
 	lc->lpp = LPP_MAGIC;
 	lc->current_pid = tsk->pid;
 	lc->user_timer = tsk->thread.user_timer;
@@ -287,18 +286,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
 	lc->steal_timer = 0;
 }
 
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
 {
 	struct lowcore *lc;
-	int cpu;
 
-	cpu = pcpu - pcpu_devices;
 	lc = lowcore_ptr[cpu];
 	lc->restart_stack = lc->kernel_stack;
 	lc->restart_fn = (unsigned long) func;
 	lc->restart_data = (unsigned long) data;
 	lc->restart_source = -1U;
-	pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+	pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
 }
 
 typedef void (pcpu_delegate_fn)(void *);
@@ -311,20 +308,23 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
 	func(data);	/* should not return */
 }
 
-static void pcpu_delegate(struct pcpu *pcpu,
+static void pcpu_delegate(struct pcpu *pcpu, int cpu,
 			  pcpu_delegate_fn *func,
 			  void *data, unsigned long stack)
 {
-	struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
-	unsigned int source_cpu = stap();
+	struct lowcore *lc, *abs_lc;
+	unsigned int source_cpu;
+
+	lc = lowcore_ptr[cpu];
+	source_cpu = stap();
 
-	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
 	if (pcpu->address == source_cpu) {
 		call_on_stack(2, stack, void, __pcpu_delegate,
 			      pcpu_delegate_fn *, func, void *, data);
 	}
 	/* Stop target cpu (if func returns this stops the current cpu). */
 	pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+	pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
 	/* Restart func on the target cpu and stop the current cpu. */
 	if (lc) {
 		lc->restart_stack = stack;
@@ -332,12 +332,13 @@ static void pcpu_delegate(struct pcpu *pcpu,
 		lc->restart_data = (unsigned long)data;
 		lc->restart_source = source_cpu;
 	} else {
-		put_abs_lowcore(restart_stack, stack);
-		put_abs_lowcore(restart_fn, (unsigned long)func);
-		put_abs_lowcore(restart_data, (unsigned long)data);
-		put_abs_lowcore(restart_source, source_cpu);
+		abs_lc = get_abs_lowcore();
+		abs_lc->restart_stack = stack;
+		abs_lc->restart_fn = (unsigned long)func;
+		abs_lc->restart_data = (unsigned long)data;
+		abs_lc->restart_source = source_cpu;
+		put_abs_lowcore(abs_lc);
 	}
-	__bpon();
 	asm volatile(
 		"0:	sigp	0,%0,%2	# sigp restart to target cpu\n"
 		"	brc	2,0b	# busy, try again\n"
@@ -364,38 +365,22 @@ static int pcpu_set_smt(unsigned int mtid)
 		smp_cpu_mt_shift = 0;
 		while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
 			smp_cpu_mt_shift++;
-		pcpu_devices[0].address = stap();
+		per_cpu(pcpu_devices, 0).address = stap();
 	}
 	return cc;
 }
 
 /*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
-	struct pcpu *pcpu;
-
-	/* Use the current cpu if it is online. */
-	pcpu = pcpu_find_address(cpu_online_mask, stap());
-	if (!pcpu)
-		/* Use the first online cpu. */
-		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
-	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
  * Call function on the ipl CPU.
  */
 void smp_call_ipl_cpu(void (*func)(void *), void *data)
 {
 	struct lowcore *lc = lowcore_ptr[0];
 
-	if (pcpu_devices[0].address == stap())
-		lc = &S390_lowcore;
+	if (ipl_pcpu->address == stap())
+		lc = get_lowcore();
 
-	pcpu_delegate(&pcpu_devices[0], func, data,
-		      lc->nodat_stack);
+	pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
 }
 
 int smp_find_processor_id(u16 address)
@@ -403,21 +388,21 @@ int smp_find_processor_id(u16 address)
 	int cpu;
 
 	for_each_present_cpu(cpu)
-		if (pcpu_devices[cpu].address == address)
+		if (per_cpu(pcpu_devices, cpu).address == address)
 			return cpu;
 	return -1;
 }
 
 void schedule_mcck_handler(void)
 {
-	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+	pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
 }
 
 bool notrace arch_vcpu_is_preempted(int cpu)
 {
 	if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
 		return false;
-	if (pcpu_running(pcpu_devices + cpu))
+	if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
 		return false;
 	return true;
 }
@@ -425,11 +410,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
 
 void notrace smp_yield_cpu(int cpu)
 {
-	if (!MACHINE_HAS_DIAG9C)
+	if (!machine_has_diag9c())
 		return;
 	diag_stat_inc_norecursion(DIAG_STAT_X09C);
 	asm volatile("diag %0,0,0x9c"
-		     : : "d" (pcpu_devices[cpu].address));
+		     : : "d" (per_cpu(pcpu_devices, cpu).address));
 }
 EXPORT_SYMBOL_GPL(smp_yield_cpu);
 
@@ -450,7 +435,7 @@ void notrace smp_emergency_stop(void)
 
 	end = get_tod_clock() + (1000000UL << 12);
 	for_each_cpu(cpu, &cpumask) {
-		struct pcpu *pcpu = pcpu_devices + cpu;
+		struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 		set_bit(ec_stop_cpu, &pcpu->ec_mask);
 		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
 				   0, NULL) == SIGP_CC_BUSY &&
@@ -459,7 +444,7 @@ void notrace smp_emergency_stop(void)
 	}
 	while (get_tod_clock() < end) {
 		for_each_cpu(cpu, &cpumask)
-			if (pcpu_stopped(pcpu_devices + cpu))
+			if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
 				cpumask_clear_cpu(cpu, &cpumask);
 		if (cpumask_empty(&cpumask))
 			break;
@@ -474,10 +459,11 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
  */
 void smp_send_stop(void)
 {
+	struct pcpu *pcpu;
 	int cpu;
 
 	/* Disable all interrupts/machine checks */
-	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+	__load_psw_mask(PSW_KERNEL_BITS);
 	trace_hardirqs_off();
 
 	debug_set_critical();
@@ -489,8 +475,9 @@ void smp_send_stop(void)
 	for_each_online_cpu(cpu) {
 		if (cpu == smp_processor_id())
 			continue;
-		pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
-		while (!pcpu_stopped(pcpu_devices + cpu))
+		pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+		pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+		while (!pcpu_stopped(pcpu))
 			cpu_relax();
 	}
 }
@@ -504,7 +491,7 @@ static void smp_handle_ext_call(void)
 	unsigned long bits;
 
 	/* handle bit signal external calls */
-	bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+	bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
 	if (test_bit(ec_stop_cpu, &bits))
 		smp_stop_cpu();
 	if (test_bit(ec_schedule, &bits))
@@ -512,7 +499,7 @@ static void smp_handle_ext_call(void)
 	if (test_bit(ec_call_function_single, &bits))
 		generic_smp_call_function_single_interrupt();
 	if (test_bit(ec_mcck_pending, &bits))
-		__s390_handle_mcck();
+		s390_handle_mcck();
 	if (test_bit(ec_irq_work, &bits))
 		irq_work_run();
 }
@@ -529,12 +516,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 	int cpu;
 
 	for_each_cpu(cpu, mask)
-		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+		pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
 }
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+	pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
 }
 
 /*
@@ -542,63 +529,18 @@ void arch_send_call_function_single_ipi(int cpu)
  * it goes straight through and wastes no time serializing
  * anything. Worst case is that we lose a reschedule ...
  */
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
 {
-	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+	pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
 }
 
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
 {
-	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+	pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
 }
 #endif
 
-/*
- * parameter area for the set/clear control bit callbacks
- */
-struct ec_creg_mask_parms {
-	unsigned long orval;
-	unsigned long andval;
-	int cr;
-};
-
-/*
- * callback for setting/clearing control bits
- */
-static void smp_ctl_bit_callback(void *info)
-{
-	struct ec_creg_mask_parms *pp = info;
-	unsigned long cregs[16];
-
-	__ctl_store(cregs, 0, 15);
-	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
-	__ctl_load(cregs, 0, 15);
-}
-
-static DEFINE_SPINLOCK(ctl_lock);
-
-void smp_ctl_set_clear_bit(int cr, int bit, bool set)
-{
-	struct ec_creg_mask_parms parms = { .cr = cr, };
-	u64 ctlreg;
-
-	if (set) {
-		parms.orval = 1UL << bit;
-		parms.andval = -1UL;
-	} else {
-		parms.orval = 0;
-		parms.andval = ~(1UL << bit);
-	}
-	spin_lock(&ctl_lock);
-	get_abs_lowcore(ctlreg, cregs_save_area[cr]);
-	ctlreg = (ctlreg & parms.andval) | parms.orval;
-	put_abs_lowcore(cregs_save_area[cr], ctlreg);
-	spin_unlock(&ctl_lock);
-	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
-}
-EXPORT_SYMBOL(smp_ctl_set_clear_bit);
-
 #ifdef CONFIG_CRASH_DUMP
 
 int smp_store_status(int cpu)
@@ -607,16 +549,16 @@ int smp_store_status(int cpu)
 	struct pcpu *pcpu;
 	unsigned long pa;
 
-	pcpu = pcpu_devices + cpu;
+	pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 	lc = lowcore_ptr[cpu];
 	pa = __pa(&lc->floating_pt_save_area);
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
 			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
 		return -EIO;
-	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+	if (!cpu_has_vx() && !cpu_has_gs())
 		return 0;
 	pa = lc->mcesad & MCESA_ORIGIN_MASK;
-	if (MACHINE_HAS_GS)
+	if (cpu_has_gs())
 		pa |= lc->mcesad & MCESA_LC_MASK;
 	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
 			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
@@ -626,7 +568,7 @@ int smp_store_status(int cpu)
 
 /*
  * Collect CPU state of the previous, crashed system.
- * There are four cases:
+ * There are three cases:
  * 1) standard zfcp/nvme dump
  *    condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
  *    The state for all CPUs except the boot CPU needs to be collected
@@ -639,46 +581,45 @@ int smp_store_status(int cpu)
  *    with sigp stop-and-store-status. The firmware or the boot-loader
  *    stored the registers of the boot CPU in the absolute lowcore in the
  *    memory of the old system.
- * 3) kdump and the old kernel did not store the CPU state,
- *    or stand-alone kdump for DASD
- *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * 3) kdump or stand-alone kdump for DASD
+ *    condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false
  *    The state for all CPUs except the boot CPU needs to be collected
  *    with sigp stop-and-store-status. The kexec code or the boot-loader
  *    stored the registers of the boot CPU in the memory of the old system.
- * 4) kdump and the old kernel stored the CPU state
- *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
- *    This case does not exist for s390 anymore, setup_arch explicitly
- *    deactivates the elfcorehdr= kernel parameter
+ *
+ * Note that the legacy kdump mode where the old kernel stored the CPU states
+ * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr=
+ * kernel parameter. The is_kdump_kernel() implementation on s390 is independent
+ * of the elfcorehdr= parameter.
  */
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, __vector128 *vxrs)
+static bool dump_available(void)
 {
-	if (is_boot_cpu)
-		vxrs = boot_cpu_vector_save_area;
-	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
-	save_area_add_vxrs(sa, vxrs);
+	return oldmem_data.start || is_ipl_type_dump();
 }
 
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, void *regs)
+void __init smp_save_dump_ipl_cpu(void)
 {
-	if (is_boot_cpu)
-		copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
-	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
+	struct save_area *sa;
+	void *regs;
+
+	if (!dump_available())
+		return;
+	sa = save_area_alloc(true);
+	regs = memblock_alloc_or_panic(512, 8);
+	copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
 	save_area_add_regs(sa, regs);
+	memblock_free(regs, 512);
+	if (cpu_has_vx())
+		save_area_add_vxrs(sa, boot_cpu_vector_save_area);
 }
 
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
 {
 	int addr, boot_cpu_addr, max_cpu_addr;
 	struct save_area *sa;
-	bool is_boot_cpu;
 	void *page;
 
-	if (!(oldmem_data.start || is_ipl_type_dump()))
-		/* No previous system present, normal boot. */
+	if (!dump_available())
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
 	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
@@ -691,26 +632,18 @@ void __init smp_save_dump_cpus(void)
 	boot_cpu_addr = stap();
 	max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
 	for (addr = 0; addr <= max_cpu_addr; addr++) {
+		if (addr == boot_cpu_addr)
+			continue;
 		if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
 		    SIGP_CC_NOT_OPERATIONAL)
 			continue;
-		is_boot_cpu = (addr == boot_cpu_addr);
-		/* Allocate save area */
-		sa = save_area_alloc(is_boot_cpu);
-		if (!sa)
-			panic("could not allocate memory for save area\n");
-		if (MACHINE_HAS_VX)
-			/* Get the vector registers */
-			smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
-		/*
-		 * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
-		 * of the boot CPU are stored in the HSA. To retrieve
-		 * these registers an SCLP request is required which is
-		 * done by drivers/s390/char/zcore.c:init_cpu_info()
-		 */
-		if (!is_boot_cpu || oldmem_data.start)
-			/* Get the CPU registers */
-			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+		sa = save_area_alloc(false);
+		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+		save_area_add_regs(sa, page);
+		if (cpu_has_vx()) {
+			__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+			save_area_add_vxrs(sa, page);
+		}
 	}
 	memblock_free(page, PAGE_SIZE);
 	diag_amode31_ops.diag308_reset();
@@ -720,17 +653,36 @@ void __init smp_save_dump_cpus(void)
 
 void smp_cpu_set_polarization(int cpu, int val)
 {
-	pcpu_devices[cpu].polarization = val;
+	per_cpu(pcpu_devices, cpu).polarization = val;
 }
 
 int smp_cpu_get_polarization(int cpu)
 {
-	return pcpu_devices[cpu].polarization;
+	return per_cpu(pcpu_devices, cpu).polarization;
+}
+
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+	per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+	return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+	int i;
+
+	cpu = smp_get_base_cpu(cpu);
+	for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+		smp_cpu_set_capacity(i, val);
 }
 
 int smp_cpu_get_cpu_address(int cpu)
 {
-	return pcpu_devices[cpu].address;
+	return per_cpu(pcpu_devices, cpu).address;
 }
 
 static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -754,8 +706,6 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
 	}
 }
 
-static int smp_add_present_cpu(int cpu);
-
 static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
 			bool configured, bool early)
 {
@@ -771,15 +721,16 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
 	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
 		if (pcpu_find_address(cpu_present_mask, address + i))
 			continue;
-		pcpu = pcpu_devices + cpu;
+		pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 		pcpu->address = address + i;
 		if (configured)
 			pcpu->state = CPU_STATE_CONFIGURED;
 		else
 			pcpu->state = CPU_STATE_STANDBY;
 		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
 		set_cpu_present(cpu, true);
-		if (!early && smp_add_present_cpu(cpu) != 0)
+		if (!early && arch_register_cpu(cpu))
 			set_cpu_present(cpu, false);
 		else
 			nr++;
@@ -806,7 +757,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
 	 * that all SMT threads get subsequent logical CPU numbers.
 	 */
 	if (early) {
-		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+		core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
 		for (i = 0; i < info->configured; i++) {
 			core = &info->core[i];
 			if (core->core_id == core_id) {
@@ -831,10 +782,7 @@ void __init smp_detect_cpus(void)
 	u16 address;
 
 	/* Get CPU information */
-	info = memblock_alloc(sizeof(*info), 8);
-	if (!info)
-		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-		      __func__, sizeof(*info), 8);
+	info = memblock_alloc_or_panic(sizeof(*info), 8);
 	smp_get_core_info(info, 1);
 	/* Find boot CPU type */
 	if (sclp.has_core_type) {
@@ -853,6 +801,7 @@ void __init smp_detect_cpus(void)
 	mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
 	mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
 	pcpu_set_smt(mtid);
+	cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1);
 
 	/* Print number of CPUs */
 	c_cpus = s_cpus = 0;
@@ -866,9 +815,6 @@ void __init smp_detect_cpus(void)
 			s_cpus += smp_cpu_mtid + 1;
 	}
 	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
-
-	/* Add CPUs present at boot */
-	__smp_rescan_cpus(info, true);
 	memblock_free(info, sizeof(*info));
 }
 
@@ -877,17 +823,18 @@ void __init smp_detect_cpus(void)
  */
 static void smp_start_secondary(void *cpuvoid)
 {
+	struct lowcore *lc = get_lowcore();
 	int cpu = raw_smp_processor_id();
 
-	S390_lowcore.last_update_clock = get_tod_clock();
-	S390_lowcore.restart_stack = (unsigned long)restart_stack;
-	S390_lowcore.restart_fn = (unsigned long)do_restart;
-	S390_lowcore.restart_data = 0;
-	S390_lowcore.restart_source = -1U;
-	S390_lowcore.restart_flags = 0;
-	restore_access_regs(S390_lowcore.access_regs_save_area);
+	lc->last_update_clock = get_tod_clock();
+	lc->restart_stack = (unsigned long)restart_stack;
+	lc->restart_fn = (unsigned long)do_restart;
+	lc->restart_data = 0;
+	lc->restart_source = -1U;
+	lc->restart_flags = 0;
+	restore_access_regs(lc->access_regs_save_area);
 	cpu_init();
-	rcu_cpu_starting(cpu);
+	rcutree_report_cpu_starting(cpu);
 	init_cpu_timer();
 	vtime_init();
 	vdso_getcpu_init();
@@ -908,7 +855,7 @@ static void smp_start_secondary(void *cpuvoid)
 /* Upping and downing of CPUs */
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-	struct pcpu *pcpu = pcpu_devices + cpu;
+	struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 	int rc;
 
 	if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -920,12 +867,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	rc = pcpu_alloc_lowcore(pcpu, cpu);
 	if (rc)
 		return rc;
+	/*
+	 * Make sure global control register contents do not change
+	 * until new CPU has initialized control registers.
+	 */
+	system_ctlreg_lock();
 	pcpu_prepare_secondary(pcpu, cpu);
-	pcpu_attach_task(pcpu, tidle);
-	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+	pcpu_attach_task(cpu, tidle);
+	pcpu_start_fn(cpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
 	while (!cpu_online(cpu))
 		cpu_relax();
+	system_ctlreg_unlock();
 	return 0;
 }
 
@@ -940,7 +893,7 @@ early_param("possible_cpus", _setup_possible_cpus);
 
 int __cpu_disable(void)
 {
-	unsigned long cregs[16];
+	struct ctlreg cregs[16];
 	int cpu;
 
 	/* Handle possible pending IPIs */
@@ -952,11 +905,11 @@ int __cpu_disable(void)
 	/* Disable pseudo page faults on this cpu. */
 	pfault_fini();
 	/* Disable interrupt sources via control register. */
-	__ctl_store(cregs, 0, 15);
-	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
-	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
-	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
-	__ctl_load(cregs, 0, 15);
+	__local_ctl_store(0, 15, cregs);
+	cregs[0].val  &= ~0x0000ee70UL;	/* disable all external interrupts */
+	cregs[6].val  &= ~0xff000000UL;	/* disable all I/O interrupts */
+	cregs[14].val &= ~0x1f000000UL;	/* disable most machine checks */
+	__local_ctl_load(0, 15, cregs);
 	clear_cpu_flag(CIF_NOHZ_DELAY);
 	return 0;
 }
@@ -966,19 +919,19 @@ void __cpu_die(unsigned int cpu)
 	struct pcpu *pcpu;
 
 	/* Wait until target cpu is down */
-	pcpu = pcpu_devices + cpu;
+	pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 	while (!pcpu_stopped(pcpu))
 		cpu_relax();
-	pcpu_free_lowcore(pcpu);
+	pcpu_free_lowcore(pcpu, cpu);
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
 	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	pcpu->flags = 0;
 }
 
 void __noreturn cpu_die(void)
 {
 	idle_task_exit();
-	__bpon();
-	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+	pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
 	for (;;) ;
 }
 
@@ -997,30 +950,36 @@ void __init smp_fill_possible_mask(void)
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-	/* request the 0x1201 emergency signal external interrupt */
 	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
 		panic("Couldn't request external interrupt 0x1201");
-	/* request the 0x1202 external call external interrupt */
+	system_ctl_set_bit(0, 14);
 	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
 		panic("Couldn't request external interrupt 0x1202");
+	system_ctl_set_bit(0, 13);
+	smp_rescan_cpus(true);
 }
 
 void __init smp_prepare_boot_cpu(void)
 {
-	struct pcpu *pcpu = pcpu_devices;
+	struct lowcore *lc = get_lowcore();
 
 	WARN_ON(!cpu_present(0) || !cpu_online(0));
-	pcpu->state = CPU_STATE_CONFIGURED;
-	S390_lowcore.percpu_offset = __per_cpu_offset[0];
+	lc->percpu_offset = __per_cpu_offset[0];
+	ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+	ipl_pcpu->state = CPU_STATE_CONFIGURED;
+	lc->pcpu = (unsigned long)ipl_pcpu;
 	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+	smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
 }
 
 void __init smp_setup_processor_id(void)
 {
-	pcpu_devices[0].address = stap();
-	S390_lowcore.cpu_nr = 0;
-	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
-	S390_lowcore.spinlock_index = 0;
+	struct lowcore *lc = get_lowcore();
+
+	lc->cpu_nr = 0;
+	per_cpu(pcpu_devices, 0).address = stap();
+	lc->spinlock_lockval = arch_spin_lockval(0);
+	lc->spinlock_index = 0;
 }
 
 /*
@@ -1040,7 +999,7 @@ static ssize_t cpu_configure_show(struct device *dev,
 	ssize_t count;
 
 	mutex_lock(&smp_cpu_state_mutex);
-	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+	count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
 	mutex_unlock(&smp_cpu_state_mutex);
 	return count;
 }
@@ -1060,15 +1019,13 @@ static ssize_t cpu_configure_store(struct device *dev,
 	cpus_read_lock();
 	mutex_lock(&smp_cpu_state_mutex);
 	rc = -EBUSY;
-	/* disallow configuration changes of online cpus and cpu 0 */
+	/* disallow configuration changes of online cpus */
 	cpu = dev->id;
 	cpu = smp_get_base_cpu(cpu);
-	if (cpu == 0)
-		goto out;
 	for (i = 0; i <= smp_cpu_mtid; i++)
 		if (cpu_online(cpu + i))
 			goto out;
-	pcpu = pcpu_devices + cpu;
+	pcpu = per_cpu_ptr(&pcpu_devices, cpu);
 	rc = 0;
 	switch (val) {
 	case 0:
@@ -1080,7 +1037,7 @@ static ssize_t cpu_configure_store(struct device *dev,
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
 				continue;
-			pcpu[i].state = CPU_STATE_STANDBY;
+			per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
 			smp_cpu_set_polarization(cpu + i,
 						 POLARIZATION_UNKNOWN);
 		}
@@ -1095,7 +1052,7 @@ static ssize_t cpu_configure_store(struct device *dev,
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
 				continue;
-			pcpu[i].state = CPU_STATE_CONFIGURED;
+			per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
 			smp_cpu_set_polarization(cpu + i,
 						 POLARIZATION_UNKNOWN);
 		}
@@ -1114,7 +1071,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
 static ssize_t show_cpu_address(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+	return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
 }
 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
 
@@ -1140,35 +1097,34 @@ static struct attribute_group cpu_online_attr_group = {
 
 static int smp_cpu_online(unsigned int cpu)
 {
-	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+	struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
 
-	return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+	return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
 }
 
 static int smp_cpu_pre_down(unsigned int cpu)
 {
-	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+	struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
 
-	sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+	sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
 	return 0;
 }
 
-static int smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
+{
+	return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
 {
-	struct device *s;
-	struct cpu *c;
+	struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
 	int rc;
 
-	c = kzalloc(sizeof(*c), GFP_KERNEL);
-	if (!c)
-		return -ENOMEM;
-	per_cpu(cpu_device, cpu) = c;
-	s = &c->dev;
-	c->hotpluggable = 1;
+	c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
 	rc = register_cpu(c, cpu);
 	if (rc)
 		goto out;
-	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+	rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
 	if (rc)
 		goto out_cpu;
 	rc = topology_cpu_init(c);
@@ -1177,14 +1133,14 @@ static int smp_add_present_cpu(int cpu)
 	return 0;
 
 out_topology:
-	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+	sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
 out_cpu:
 	unregister_cpu(c);
 out:
 	return rc;
 }
 
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
 {
 	struct sclp_core_info *info;
 	int nr;
@@ -1193,7 +1149,7 @@ int __ref smp_rescan_cpus(void)
 	if (!info)
 		return -ENOMEM;
 	smp_get_core_info(info, 0);
-	nr = __smp_rescan_cpus(info, false);
+	nr = __smp_rescan_cpus(info, early);
 	kfree(info);
 	if (nr)
 		topology_schedule_update();
@@ -1210,7 +1166,7 @@ static ssize_t __ref rescan_store(struct device *dev,
 	rc = lock_device_hotplug_sysfs();
 	if (rc)
 		return rc;
-	rc = smp_rescan_cpus();
+	rc = smp_rescan_cpus(false);
 	unlock_device_hotplug();
 	return rc ? rc : count;
 }
@@ -1218,77 +1174,19 @@ static DEVICE_ATTR_WO(rescan);
 
 static int __init s390_smp_init(void)
 {
-	int cpu, rc = 0;
+	struct device *dev_root;
+	int rc;
 
-	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
-	if (rc)
-		return rc;
-	for_each_present_cpu(cpu) {
-		rc = smp_add_present_cpu(cpu);
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_rescan);
+		put_device(dev_root);
 		if (rc)
-			goto out;
+			return rc;
 	}
-
 	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
 			       smp_cpu_online, smp_cpu_pre_down);
 	rc = rc <= 0 ? rc : 0;
-out:
 	return rc;
 }
 subsys_initcall(s390_smp_init);
-
-static __always_inline void set_new_lowcore(struct lowcore *lc)
-{
-	union register_pair dst, src;
-	u32 pfx;
-
-	src.even = (unsigned long) &S390_lowcore;
-	src.odd  = sizeof(S390_lowcore);
-	dst.even = (unsigned long) lc;
-	dst.odd  = sizeof(*lc);
-	pfx = __pa(lc);
-
-	asm volatile(
-		"	mvcl	%[dst],%[src]\n"
-		"	spx	%[pfx]\n"
-		: [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
-		: [pfx] "Q" (pfx)
-		: "memory", "cc");
-}
-
-static int __init smp_reinit_ipl_cpu(void)
-{
-	unsigned long async_stack, nodat_stack, mcck_stack;
-	struct lowcore *lc, *lc_ipl;
-	unsigned long flags, cr0;
-	u64 mcesad;
-
-	lc_ipl = lowcore_ptr[0];
-	lc = (struct lowcore *)	__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
-	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-	async_stack = stack_alloc();
-	mcck_stack = stack_alloc();
-	if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
-		panic("Couldn't allocate memory");
-
-	local_irq_save(flags);
-	local_mcck_disable();
-	set_new_lowcore(lc);
-	S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
-	S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
-	S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28); /* disable lowcore protection */
-	S390_lowcore.mcesad = mcesad;
-	__ctl_load(cr0, 0, 0);
-	lowcore_ptr[0] = lc;
-	local_mcck_enable();
-	local_irq_restore(flags);
-
-	free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER);
-	memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
-	memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
-
-	return 0;
-}
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 7ee455e8e3d5..b153a395f46d 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -5,10 +5,15 @@
  *  Copyright IBM Corp. 2006
  */
 
+#include <linux/perf_event.h>
 #include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 #include <asm/kprobes.h>
+#include <asm/ptrace.h>
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
@@ -40,12 +45,12 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 		if (!addr)
 			return -EINVAL;
 
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
 		/*
-		 * Mark stacktraces with kretprobed functions on them
+		 * Mark stacktraces with krethook functions on them
 		 * as unreliable.
 		 */
-		if (state.ip == (unsigned long)__kretprobe_trampoline)
+		if (state.ip == (unsigned long)arch_rethook_trampoline)
 			return -EINVAL;
 #endif
 
@@ -58,3 +63,103 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 		return -EINVAL;
 	return 0;
 }
+
+static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie,
+			    struct perf_callchain_entry_ctx *entry, bool perf,
+			    unsigned long ip)
+{
+#ifdef CONFIG_PERF_EVENTS
+	if (perf) {
+		if (perf_callchain_store(entry, ip))
+			return false;
+		return true;
+	}
+#endif
+	return consume_entry(cookie, ip);
+}
+
+static inline bool ip_invalid(unsigned long ip)
+{
+	/*
+	 * Perform some basic checks if an instruction address taken
+	 * from unreliable source is invalid.
+	 */
+	if (ip & 1)
+		return true;
+	if (ip < mmap_min_addr)
+		return true;
+	if (ip >= current->mm->context.asce_limit)
+		return true;
+	return false;
+}
+
+static inline bool ip_within_vdso(unsigned long ip)
+{
+	return in_range(ip, current->mm->context.vdso_base, vdso_text_size());
+}
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+				 struct perf_callchain_entry_ctx *entry,
+				 const struct pt_regs *regs, bool perf)
+{
+	struct stack_frame_vdso_wrapper __user *sf_vdso;
+	struct stack_frame_user __user *sf;
+	unsigned long ip, sp;
+	bool first = true;
+
+	if (is_compat_task())
+		return;
+	if (!current->mm)
+		return;
+	ip = instruction_pointer(regs);
+	if (!store_ip(consume_entry, cookie, entry, perf, ip))
+		return;
+	sf = (void __user *)user_stack_pointer(regs);
+	pagefault_disable();
+	while (1) {
+		if (__get_user(sp, &sf->back_chain))
+			break;
+		/*
+		 * VDSO entry code has a non-standard stack frame layout.
+		 * See VDSO user wrapper code for details.
+		 */
+		if (!sp && ip_within_vdso(ip)) {
+			sf_vdso = (void __user *)sf;
+			if (__get_user(ip, &sf_vdso->return_address))
+				break;
+			sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD;
+			sf = (void __user *)sp;
+			if (__get_user(sp, &sf->back_chain))
+				break;
+		} else {
+			sf = (void __user *)sp;
+			if (__get_user(ip, &sf->gprs[8]))
+				break;
+		}
+		/* Sanity check: ABI requires SP to be 8 byte aligned. */
+		if (sp & 0x7)
+			break;
+		if (ip_invalid(ip)) {
+			/*
+			 * If the instruction address is invalid, and this
+			 * is the first stack frame, assume r14 has not
+			 * been written to the stack yet. Otherwise exit.
+			 */
+			if (!first)
+				break;
+			ip = regs->gprs[14];
+			if (ip_invalid(ip))
+				break;
+		}
+		if (!store_ip(consume_entry, cookie, entry, perf, ip))
+			break;
+		first = false;
+	}
+	pagefault_enable();
+}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs)
+{
+	arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
+}
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 4d141e2c132e..d40f0b983e74 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -17,6 +17,7 @@
 #include <asm/ebcdic.h>
 #include <asm/facility.h>
 #include <asm/sthyi.h>
+#include <asm/asm.h>
 #include "entry.h"
 
 #define DED_WEIGHT 0xffff
@@ -300,32 +301,57 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
 	return (struct diag204_x_part_block *)&block->cpus[i];
 }
 
-static void fill_diag(struct sthyi_sctns *sctns)
+static void *diag204_get_data(bool diag204_allow_busy)
 {
-	int i, r, pages;
-	bool this_lpar;
+	unsigned long subcode;
 	void *diag204_buf;
+	int pages, rc;
+
+	subcode = DIAG204_SUBC_RSI;
+	subcode |= DIAG204_INFO_EXT;
+	pages = diag204(subcode, 0, NULL);
+	if (pages < 0)
+		return ERR_PTR(pages);
+	if (pages == 0)
+		return ERR_PTR(-ENODATA);
+	diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return ERR_PTR(-ENOMEM);
+	subcode = DIAG204_SUBC_STIB7;
+	subcode |= DIAG204_INFO_EXT;
+	if (diag204_has_bif() && diag204_allow_busy)
+		subcode |= DIAG204_BIF_BIT;
+	rc = diag204(subcode, pages, diag204_buf);
+	if (rc < 0) {
+		vfree(diag204_buf);
+		return ERR_PTR(rc);
+	}
+	return diag204_buf;
+}
+
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+	/*
+	 * Check if validity bits are set when diag204 data
+	 * is gathered.
+	 */
+	if (sctns->par.infpval1)
+		return true;
+	return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+	int i;
+	bool this_lpar;
 	void *diag224_buf = NULL;
 	struct diag204_x_info_blk_hdr *ti_hdr;
 	struct diag204_x_part_block *part_block;
 	struct diag204_x_phys_block *phys_block;
 	struct lpar_cpu_inf lpar_inf = {};
 
-	/* Errors are handled through the validity bits in the response. */
-	pages = diag204((unsigned long)DIAG204_SUBC_RSI |
-			(unsigned long)DIAG204_INFO_EXT, 0, NULL);
-	if (pages <= 0)
-		return;
-
-	diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
-	if (!diag204_buf)
-		return;
-
-	r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
-		    (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
-	if (r < 0)
-		goto out;
-
 	diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
 	if (!diag224_buf || diag224(diag224_buf))
 		goto out;
@@ -390,7 +416,6 @@ static void fill_diag(struct sthyi_sctns *sctns)
 
 out:
 	free_page((unsigned long)diag224_buf);
-	vfree(diag204_buf);
 }
 
 static int sthyi(u64 vaddr, u64 *rc)
@@ -401,30 +426,41 @@ static int sthyi(u64 vaddr, u64 *rc)
 
 	asm volatile(
 		".insn   rre,0xB2560000,%[r1],%[r2]\n"
-		"ipm     %[cc]\n"
-		"srl     %[cc],28\n"
-		: [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [r2] "+&d" (r2.pair)
 		: [r1] "d" (r1.pair)
-		: "memory", "cc");
+		: CC_CLOBBER_LIST("memory"));
 	*rc = r2.odd;
-	return cc;
+	return CC_TRANSFORM(cc);
 }
 
 static int fill_dst(void *dst, u64 *rc)
 {
+	void *diag204_buf;
+
 	struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
 
 	/*
 	 * If the facility is on, we don't want to emulate the instruction.
 	 * We ask the hypervisor to provide the data.
 	 */
-	if (test_facility(74))
+	if (test_facility(74)) {
+		memset(dst, 0, PAGE_SIZE);
 		return sthyi((u64)dst, rc);
-
+	}
+	/*
+	 * When emulating, if diag204 returns BUSY don't reset dst buffer
+	 * and use cached data.
+	 */
+	*rc = 0;
+	diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+	if (IS_ERR(diag204_buf))
+		return PTR_ERR(diag204_buf);
+	memset(dst, 0, PAGE_SIZE);
 	fill_hdr(sctns);
 	fill_stsi(sctns);
-	fill_diag(sctns);
-	*rc = 0;
+	fill_diag(sctns, diag204_buf);
+	vfree(diag204_buf);
 	return 0;
 }
 
@@ -443,11 +479,14 @@ static int sthyi_update_cache(u64 *rc)
 {
 	int r;
 
-	memset(sthyi_cache.info, 0, PAGE_SIZE);
 	r = fill_dst(sthyi_cache.info, rc);
-	if (r)
-		return r;
-	sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+	if (r == 0) {
+		sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+	} else if (r == -EBUSY) {
+		/* mark as expired and return 0 to keep using cached data */
+		sthyi_cache.end = jiffies - 1;
+		r = 0;
+	}
 	return r;
 }
 
@@ -459,9 +498,9 @@ static int sthyi_update_cache(u64 *rc)
  *
  * Fills the destination with system information returned by the STHYI
  * instruction. The data is generated by emulation or execution of STHYI,
- * if available. The return value is the condition code that would be
- * returned, the rc parameter is the return code which is passed in
- * register R2 + 1.
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
  */
 int sthyi_fill(void *dst, u64 *rc)
 {
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..4fee74553ca2 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -12,6 +12,7 @@
  *  platform.
  */
 
+#include <linux/cpufeature.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -38,33 +39,6 @@
 
 #include "entry.h"
 
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
-	struct s390_mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-	error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
-	return error;
-}
-
 #ifdef CONFIG_SYSVIPC
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls.
@@ -108,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall)
 	return -ENOSYS;
 }
 
-static void do_syscall(struct pt_regs *regs)
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
 {
 	unsigned long nr;
 
+	add_random_kstack_offset();
+	enter_from_user_mode(regs);
+	regs->psw = get_lowcore()->svc_old_psw;
+	regs->int_code = get_lowcore()->svc_int_code;
+	update_timer_sys();
+	if (cpu_has_bear())
+		current->thread.last_break = regs->last_break;
+	local_irq_enable();
+	regs->orig_gpr2 = regs->gprs[2];
+	if (unlikely(per_trap))
+		set_thread_flag(TIF_PER_TRAP);
+	regs->flags = 0;
+	set_pt_regs_flag(regs, PIF_SYSCALL);
 	nr = regs->int_code & 0xffff;
-	if (!nr) {
+	if (likely(!nr)) {
 		nr = regs->gprs[1] & 0xffff;
 		regs->int_code &= ~0xffffUL;
 		regs->int_code |= nr;
 	}
-
 	regs->gprs[2] = nr;
-
 	if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
 		regs->psw.addr = current->restart_block.arch_data;
 		current->restart_block.arch_data = 1;
 	}
 	nr = syscall_enter_from_user_mode_work(regs, nr);
-
 	/*
 	 * In the s390 ptrace ABI, both the syscall number and the return value
 	 * use gpr2. However, userspace puts the syscall number either in the
@@ -134,37 +118,11 @@ static void do_syscall(struct pt_regs *regs)
 	 * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
 	 * and if set, the syscall will be skipped.
 	 */
-
 	if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
 		goto out;
 	regs->gprs[2] = -ENOSYS;
-	if (likely(nr >= NR_syscalls))
-		goto out;
-	do {
+	if (likely(nr < NR_syscalls))
 		regs->gprs[2] = current->thread.sys_call_table[nr](regs);
-	} while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
 out:
-	syscall_exit_to_user_mode_work(regs);
-}
-
-void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
-{
-	add_random_kstack_offset();
-	enter_from_user_mode(regs);
-	regs->psw = S390_lowcore.svc_old_psw;
-	regs->int_code = S390_lowcore.svc_int_code;
-	update_timer_sys();
-	if (static_branch_likely(&cpu_has_bear))
-		current->thread.last_break = regs->last_break;
-
-	local_irq_enable();
-	regs->orig_gpr2 = regs->gprs[2];
-
-	if (per_trap)
-		set_thread_flag(TIF_PER_TRAP);
-
-	regs->flags = 0;
-	set_pt_regs_flag(regs, PIF_SYSCALL);
-	do_syscall(regs);
-	exit_to_user_mode();
+	syscall_exit_to_user_mode(regs);
 }
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index fb85e797946d..c5d958a09ff4 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -4,15 +4,15 @@ gen	:= arch/$(ARCH)/include/generated
 kapi	:= $(gen)/asm
 uapi	:= $(gen)/uapi/asm
 
-syscall	:= $(srctree)/$(src)/syscall.tbl
-systbl	:= $(srctree)/$(src)/syscalltbl
+syscall	:= $(src)/syscall.tbl
+systbl	:= $(src)/syscalltbl
 
 gen-y := $(kapi)/syscall_table.h
 kapi-hdrs-y := $(kapi)/unistd_nr.h
 uapi-hdrs-y := $(uapi)/unistd_32.h
 uapi-hdrs-y += $(uapi)/unistd_64.h
 
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
 
 PHONY += kapi uapi
 
@@ -23,23 +23,26 @@ uapi:	$(uapi-hdrs-y)
 # Create output directory if not already present
 $(shell mkdir -p $(uapi) $(kapi))
 
-filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+quiet_cmd_syshdr = SYSHDR  $@
+      cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@
 
-filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+quiet_cmd_sysnr = SYSNR   $@
+      cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@
 
-filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+quiet_cmd_syscalls = SYSTBL  $@
+      cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@
 
 syshdr_abi_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) FORCE
-	$(call filechk,syshdr,$@)
+$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) FORCE
-	$(call filechk,syshdr,$@)
+$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) FORCE
-	$(call filechk,syscalls)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,syscalls)
 
 sysnr_abi_unistd_nr := common,32,64
-$(kapi)/unistd_nr.h: $(syscall) FORCE
-	$(call filechk,sysnr)
+$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE
+	$(call if_changed,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 799147658dee..a4569b96ef06 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -100,7 +100,7 @@
 106  common	stat			sys_newstat			compat_sys_newstat
 107  common	lstat			sys_newlstat			compat_sys_newlstat
 108  common	fstat			sys_newfstat			compat_sys_newfstat
-110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+110  common	lookup_dcookie		-				-
 111  common	vhangup			sys_vhangup			sys_vhangup
 112  common	idle			-				-
 114  common	wait4			sys_wait4			compat_sys_wait4
@@ -418,7 +418,7 @@
 412	32	utimensat_time64	-				sys_utimensat
 413	32	pselect6_time64		-				compat_sys_pselect6_time64
 414	32	ppoll_time64		-				compat_sys_ppoll_time64
-416	32	io_pgetevents_time64	-				sys_io_pgetevents
+416	32	io_pgetevents_time64	-				compat_sys_io_pgetevents_time64
 417	32	recvmmsg_time64		-				compat_sys_recvmmsg_time64
 418	32	mq_timedsend_time64	-				sys_mq_timedsend
 419	32	mq_timedreceive_time64	-				sys_mq_timedreceive
@@ -449,7 +449,24 @@
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
-# 447 reserved for memfd_secret
+447  common	memfd_secret		sys_memfd_secret		sys_memfd_secret
 448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
 449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv
 450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node
+451  common	cachestat		sys_cachestat			sys_cachestat
+452  common	fchmodat2		sys_fchmodat2			sys_fchmodat2
+453  common	map_shadow_stack	sys_map_shadow_stack		sys_map_shadow_stack
+454  common	futex_wake		sys_futex_wake			sys_futex_wake
+455  common	futex_wait		sys_futex_wait			sys_futex_wait
+456  common	futex_requeue		sys_futex_requeue		sys_futex_requeue
+457  common	statmount		sys_statmount			sys_statmount
+458  common	listmount		sys_listmount			sys_listmount
+459  common	lsm_get_self_attr	sys_lsm_get_self_attr		sys_lsm_get_self_attr
+460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
+461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+462  common	mseal			sys_mseal			sys_mseal
+463  common	setxattrat		sys_setxattrat			sys_setxattrat
+464  common	getxattrat		sys_getxattrat			sys_getxattrat
+465  common	listxattrat		sys_listxattrat			sys_listxattrat
+466  common	removexattrat		sys_removexattrat		sys_removexattrat
+467  common	open_tree_attr		sys_open_tree_attr		sys_open_tree_attr
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index b5e364358ce4..1ea84e942bd4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -5,6 +5,7 @@
  *	       Martin Schwidefsky <schwidefsky@de.ibm.com>,
  */
 
+#include <linux/cpufeature.h>
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -15,54 +16,17 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <asm/asm-extable.h>
+#include <asm/machine.h>
 #include <asm/ebcdic.h>
 #include <asm/debug.h>
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
+#include <asm/asm.h>
 
 int topology_max_mnest;
 
-static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
-{
-	int r0 = (fc << 28) | sel1;
-	int rc = 0;
-
-	asm volatile(
-		"	lr	0,%[r0]\n"
-		"	lr	1,%[r1]\n"
-		"	stsi	0(%[sysinfo])\n"
-		"0:	jz	2f\n"
-		"1:	lhi	%[rc],%[retval]\n"
-		"2:	lr	%[r0],0\n"
-		EX_TABLE(0b, 1b)
-		: [r0] "+d" (r0), [rc] "+d" (rc)
-		: [r1] "d" (sel2),
-		  [sysinfo] "a" (sysinfo),
-		  [retval] "K" (-EOPNOTSUPP)
-		: "cc", "0", "1", "memory");
-	*lvl = ((unsigned int) r0) >> 28;
-	return rc;
-}
-
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
-	int lvl, rc;
-
-	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
-	if (rc)
-		return rc;
-	return fc ? 0 : lvl;
-}
-EXPORT_SYMBOL(stsi);
-
 #ifdef CONFIG_PROC_FS
 
 static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
@@ -81,10 +45,12 @@ static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
 
 static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
 {
+	bool has_var_cap;
 	int i;
 
 	if (stsi(info, 1, 1, 1))
 		return;
+	has_var_cap = !!info->model_var_cap[0];
 	EBCASC(info->manufacturer, sizeof(info->manufacturer));
 	EBCASC(info->type, sizeof(info->type));
 	EBCASC(info->model, sizeof(info->model));
@@ -93,6 +59,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
 	EBCASC(info->model_capacity, sizeof(info->model_capacity));
 	EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
 	EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+	if (has_var_cap)
+		EBCASC(info->model_var_cap, sizeof(info->model_var_cap));
 	seq_printf(m, "Manufacturer:         %-16.16s\n", info->manufacturer);
 	seq_printf(m, "Type:                 %-4.4s\n", info->type);
 	if (info->lic)
@@ -120,12 +88,18 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
 		seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
 			   info->model_temp_cap,
 			   info->model_temp_cap_rating);
+	if (has_var_cap && info->model_var_cap_rating)
+		seq_printf(m, "Model Var. Capacity:  %-16.16s %08u\n",
+			   info->model_var_cap,
+			   info->model_var_cap_rating);
 	if (info->ncr)
 		seq_printf(m, "Nominal Cap. Rating:  %08u\n", info->ncr);
 	if (info->npr)
 		seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
 	if (info->ntr)
 		seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+	if (has_var_cap && info->nvr)
+		seq_printf(m, "Nominal Var. Rating:  %08u\n", info->nvr);
 	if (info->cai) {
 		seq_printf(m, "Capacity Adj. Ind.:   %d\n", info->cai);
 		seq_printf(m, "Capacity Ch. Reason:  %d\n", info->ccr);
@@ -144,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
 	int i;
 
 	seq_putc(m, '\n');
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (!cpu_has_topology())
 		return;
 	if (stsi(info, 15, 1, topology_max_mnest))
 		return;
@@ -387,7 +361,7 @@ static void service_level_vm_print(struct seq_file *m,
 {
 	char *query_buffer, *str;
 
-	query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+	query_buffer = kmalloc(1024, GFP_KERNEL);
 	if (!query_buffer)
 		return;
 	cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
@@ -405,7 +379,7 @@ static struct service_level service_level_vm = {
 static __init int create_proc_service_level(void)
 {
 	proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
-	if (MACHINE_IS_VM)
+	if (machine_is_vm())
 		register_service_level(&service_level_vm);
 	return 0;
 }
@@ -416,9 +390,9 @@ subsys_initcall(create_proc_service_level);
  */
 void s390_adjust_jiffies(void)
 {
+	DECLARE_KERNEL_FPU_ONSTACK16(fpu);
 	struct sysinfo_1_2_2 *info;
 	unsigned long capability;
-	struct kernel_fpu fpu;
 
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!info)
@@ -437,21 +411,14 @@ void s390_adjust_jiffies(void)
 		 * point division ..
 		 */
 		kernel_fpu_begin(&fpu, KERNEL_FPR);
-		asm volatile(
-			"	sfpc	%3\n"
-			"	l	%0,%1\n"
-			"	tmlh	%0,0xff80\n"
-			"	jnz	0f\n"
-			"	cefbr	%%f2,%0\n"
-			"	j	1f\n"
-			"0:	le	%%f2,%1\n"
-			"1:	cefbr	%%f0,%2\n"
-			"	debr	%%f0,%%f2\n"
-			"	cgebr	%0,5,%%f0\n"
-			: "=&d" (capability)
-			: "Q" (info->capability), "d" (10000000), "d" (0)
-			: "cc"
-			);
+		fpu_sfpc(0);
+		if (info->capability & 0xff800000)
+			fpu_ldgr(2, info->capability);
+		else
+			fpu_cefbr(2, info->capability);
+		fpu_cefbr(0, 10000000);
+		fpu_debr(0, 2);
+		capability = fpu_cgebr(0, 5);
 		kernel_fpu_end(&fpu, KERNEL_FPR);
 	} else
 		/*
@@ -495,7 +462,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = {       \
 	.open		= stsi_open_##fc##_##s1##_##s2,			       \
 	.release	= stsi_release,					       \
 	.read		= stsi_read,					       \
-	.llseek		= no_llseek,					       \
 };
 
 static int stsi_release(struct inode *inode, struct file *file)
@@ -557,7 +523,7 @@ static __init int stsi_init_debugfs(void)
 		sf = &stsi_file[i];
 		debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
 	}
-	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) {
 		char link_to[10];
 
 		sprintf(link_to, "15_1_%d", topology_mnest_limit());
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 2c8b14cc5556..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
  * affects a few functions that are not performance-relevant.
  */
 	.macro BR_EX_AMODE31_r14
-	larl	%r1,0f
-	ex	0,0(%r1)
+	exrl	0,0f
 	j	.
 0:	br	%r14
 	.endm
@@ -27,7 +26,7 @@
 /*
  * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
  */
-ENTRY(_diag14_amode31)
+SYM_FUNC_START(_diag14_amode31)
 	lgr	%r1,%r2
 	lgr	%r2,%r3
 	lgr	%r3,%r4
@@ -42,12 +41,12 @@ ENTRY(_diag14_amode31)
 	lgfr	%r2,%r5
 	BR_EX_AMODE31_r14
 	EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
-ENDPROC(_diag14_amode31)
+SYM_FUNC_END(_diag14_amode31)
 
 /*
  * int _diag210_amode31(struct diag210 *addr)
  */
-ENTRY(_diag210_amode31)
+SYM_FUNC_START(_diag210_amode31)
 	lgr	%r1,%r2
 	lhi	%r2,-1
 	sam31
@@ -60,12 +59,25 @@ ENTRY(_diag210_amode31)
 	lgfr	%r2,%r2
 	BR_EX_AMODE31_r14
 	EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
-ENDPROC(_diag210_amode31)
+SYM_FUNC_END(_diag210_amode31)
 
 /*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+SYM_FUNC_START(_diag8c_amode31)
+	llgf	%r3,0(%r3)
+	sam31
+	diag	%r2,%r4,0x8c
+.Ldiag8c_ex:
+	sam64
+	lgfr	%r2,%r3
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+SYM_FUNC_END(_diag8c_amode31)
+/*
  * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
  */
-ENTRY(_diag26c_amode31)
+SYM_FUNC_START(_diag26c_amode31)
 	lghi	%r5,-EOPNOTSUPP
 	sam31
 	diag	%r2,%r4,0x26c
@@ -74,42 +86,42 @@ ENTRY(_diag26c_amode31)
 	lgfr	%r2,%r5
 	BR_EX_AMODE31_r14
 	EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
-ENDPROC(_diag26c_amode31)
+SYM_FUNC_END(_diag26c_amode31)
 
 /*
- * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
+ * void _diag0c_amode31(unsigned long rx)
  */
-ENTRY(_diag0c_amode31)
+SYM_FUNC_START(_diag0c_amode31)
 	sam31
 	diag	%r2,%r2,0x0c
 	sam64
 	BR_EX_AMODE31_r14
-ENDPROC(_diag0c_amode31)
+SYM_FUNC_END(_diag0c_amode31)
 
 /*
  * void _diag308_reset_amode31(void)
  *
  * Calls diag 308 subcode 1 and continues execution
  */
-ENTRY(_diag308_reset_amode31)
-	larl	%r4,.Lctlregs		# Save control registers
+SYM_FUNC_START(_diag308_reset_amode31)
+	larl	%r4,ctlregs		# Save control registers
 	stctg	%c0,%c15,0(%r4)
 	lg	%r2,0(%r4)		# Disable lowcore protection
 	nilh	%r2,0xefff
-	larl	%r4,.Lctlreg0
+	larl	%r4,ctlreg0
 	stg	%r2,0(%r4)
 	lctlg	%c0,%c0,0(%r4)
-	larl	%r4,.Lfpctl		# Floating point control register
+	larl	%r4,fpctl		# Floating point control register
 	stfpc	0(%r4)
-	larl	%r4,.Lprefix		# Save prefix register
+	larl	%r4,prefix		# Save prefix register
 	stpx	0(%r4)
-	larl	%r4,.Lprefix_zero	# Set prefix register to 0
+	larl	%r4,prefix_zero	# Set prefix register to 0
 	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Save PSW flags
+	larl	%r4,continue_psw	# Save PSW flags
 	epsw	%r2,%r3
 	stm	%r2,%r3,0(%r4)
 	larl	%r4,.Lrestart_part2	# Setup restart PSW at absolute 0
-	larl	%r3,.Lrestart_diag308_psw
+	larl	%r3,restart_diag308_psw
 	og	%r4,0(%r3)		# Save PSW
 	lghi	%r3,0
 	sturg	%r4,%r3			# Use sturg, because of large pages
@@ -121,39 +133,26 @@ ENTRY(_diag308_reset_amode31)
 	lhi	%r1,2			# Use mode 2 = ESAME (dump)
 	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
 	sam64				# Switch to 64 bit addressing mode
-	larl	%r4,.Lctlregs		# Restore control registers
+	larl	%r4,ctlregs		# Restore control registers
 	lctlg	%c0,%c15,0(%r4)
-	larl	%r4,.Lfpctl		# Restore floating point ctl register
+	larl	%r4,fpctl		# Restore floating point ctl register
 	lfpc	0(%r4)
-	larl	%r4,.Lprefix		# Restore prefix register
+	larl	%r4,prefix		# Restore prefix register
 	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Restore PSW flags
+	larl	%r4,continue_psw	# Restore PSW flags
 	larl	%r2,.Lcontinue
 	stg	%r2,8(%r4)
 	lpswe	0(%r4)
 .Lcontinue:
 	BR_EX_AMODE31_r14
-ENDPROC(_diag308_reset_amode31)
+SYM_FUNC_END(_diag308_reset_amode31)
 
 	.section .amode31.data,"aw",@progbits
-.align	8
-.Lrestart_diag308_psw:
-	.long	0x00080000,0x80000000
-
-.align 8
-.Lcontinue_psw:
-	.quad	0,0
-
-.align 8
-.Lctlreg0:
-	.quad	0
-.Lctlregs:
-	.rept	16
-	.quad	0
-	.endr
-.Lfpctl:
-	.long	0
-.Lprefix:
-	.long	0
-.Lprefix_zero:
-	.long	0
+	.balign	8
+SYM_DATA_LOCAL(restart_diag308_psw,	.long 0x00080000,0x80000000)
+SYM_DATA_LOCAL(continue_psw,		.quad 0,0)
+SYM_DATA_LOCAL(ctlreg0,			.quad 0)
+SYM_DATA_LOCAL(ctlregs,			.fill 16,8,0)
+SYM_DATA_LOCAL(fpctl,			.long 0)
+SYM_DATA_LOCAL(prefix,			.long 0)
+SYM_DATA_LOCAL(prefix_zero,		.long 0)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..fed17d407a44 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -36,7 +36,6 @@
 #include <linux/profile.h>
 #include <linux/timex.h>
 #include <linux/notifier.h>
-#include <linux/timekeeper_internal.h>
 #include <linux/clockchips.h>
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
@@ -55,10 +54,10 @@
 #include <asm/cio.h>
 #include "entry.h"
 
-union tod_clock tod_clock_base __section(".data");
+union tod_clock __bootdata_preserved(tod_clock_base);
 EXPORT_SYMBOL_GPL(tod_clock_base);
 
-u64 clock_comparator_max = -1ULL;
+u64 __bootdata_preserved(clock_comparator_max);
 EXPORT_SYMBOL_GPL(clock_comparator_max);
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -80,12 +79,10 @@ void __init time_early_init(void)
 {
 	struct ptff_qto qto;
 	struct ptff_qui qui;
-	int cs;
 
 	/* Initialize TOD steering parameters */
 	tod_steering_end = tod_clock_base.tod;
-	for (cs = 0; cs < CS_BASES; cs++)
-		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+	vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
 
 	if (!test_facility(28))
 		return;
@@ -102,6 +99,11 @@ void __init time_early_init(void)
 			((long) qui.old_leap * 4096000000L);
 }
 
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+	return tod_to_ns(__get_tod_clock_monotonic());
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -126,7 +128,7 @@ void clock_comparator_work(void)
 {
 	struct clock_event_device *cd;
 
-	S390_lowcore.clock_comparator = clock_comparator_max;
+	get_lowcore()->clock_comparator = clock_comparator_max;
 	cd = this_cpu_ptr(&comparators);
 	cd->event_handler(cd);
 }
@@ -134,8 +136,8 @@ void clock_comparator_work(void)
 static int s390_next_event(unsigned long delta,
 			   struct clock_event_device *evt)
 {
-	S390_lowcore.clock_comparator = get_tod_clock() + delta;
-	set_clock_comparator(S390_lowcore.clock_comparator);
+	get_lowcore()->clock_comparator = get_tod_clock() + delta;
+	set_clock_comparator(get_lowcore()->clock_comparator);
 	return 0;
 }
 
@@ -148,8 +150,8 @@ void init_cpu_timer(void)
 	struct clock_event_device *cd;
 	int cpu;
 
-	S390_lowcore.clock_comparator = clock_comparator_max;
-	set_clock_comparator(S390_lowcore.clock_comparator);
+	get_lowcore()->clock_comparator = clock_comparator_max;
+	set_clock_comparator(get_lowcore()->clock_comparator);
 
 	cpu = smp_processor_id();
 	cd = &per_cpu(comparators, cpu);
@@ -168,10 +170,10 @@ void init_cpu_timer(void)
 	clockevents_register_device(cd);
 
 	/* Enable clock comparator timer interrupt. */
-	__ctl_set_bit(0,11);
+	local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT);
 
 	/* Always allow the timing alert external interrupt. */
-	__ctl_set_bit(0, 4);
+	local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT);
 }
 
 static void clock_comparator_interrupt(struct ext_code ext_code,
@@ -179,8 +181,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 				       unsigned long param64)
 {
 	inc_irq_stat(IRQEXT_CLK);
-	if (S390_lowcore.clock_comparator == clock_comparator_max)
-		set_clock_comparator(S390_lowcore.clock_comparator);
+	if (get_lowcore()->clock_comparator == clock_comparator_max)
+		set_clock_comparator(get_lowcore()->clock_comparator);
 }
 
 static void stp_timing_alert(struct stp_irq_parm *);
@@ -246,10 +248,11 @@ static struct clocksource clocksource_tod = {
 	.rating		= 400,
 	.read		= read_tod_clock,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1000,
-	.shift		= 12,
+	.mult		= 4096000,
+	.shift		= 24,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+	.id		= CSID_S390_TOD,
 };
 
 struct clocksource * __init clocksource_default_clock(void)
@@ -368,7 +371,6 @@ static void clock_sync_global(long delta)
 {
 	unsigned long now, adj;
 	struct ptff_qto qto;
-	int cs;
 
 	/* Fixup the monotonic sched clock. */
 	tod_clock_base.eitod += delta;
@@ -384,10 +386,8 @@ static void clock_sync_global(long delta)
 		panic("TOD clock sync offset %li is too large to drift\n",
 		      tod_steering_delta);
 	tod_steering_end = now + (abs(tod_steering_delta) << 15);
-	for (cs = 0; cs < CS_BASES; cs++) {
-		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
-		vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
-	}
+	vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
+	vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta;
 
 	/* Update LPAR offset. */
 	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
@@ -403,12 +403,12 @@ static void clock_sync_global(long delta)
 static void clock_sync_local(long delta)
 {
 	/* Add the delta to the clock comparator. */
-	if (S390_lowcore.clock_comparator != clock_comparator_max) {
-		S390_lowcore.clock_comparator += delta;
-		set_clock_comparator(S390_lowcore.clock_comparator);
+	if (get_lowcore()->clock_comparator != clock_comparator_max) {
+		get_lowcore()->clock_comparator += delta;
+		set_clock_comparator(get_lowcore()->clock_comparator);
 	}
 	/* Adjust the last_update_clock time-stamp. */
-	S390_lowcore.last_update_clock += delta;
+	get_lowcore()->last_update_clock += delta;
 }
 
 /* Single threaded workqueue used for stp sync events */
@@ -463,6 +463,12 @@ static void __init stp_reset(void)
 	}
 }
 
+bool stp_enabled(void)
+{
+	return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online;
+}
+EXPORT_SYMBOL(stp_enabled);
+
 static void stp_timeout(struct timer_list *unused)
 {
 	queue_work(time_sync_wq, &stp_work);
@@ -651,12 +657,12 @@ static void stp_check_leap(void)
 		if (ret < 0)
 			pr_err("failed to set leap second flags\n");
 		/* arm Timer to clear leap second flags */
-		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+		mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
 	} else {
 		/* The day the leap second is scheduled for hasn't been reached. Retry
 		 * in one hour.
 		 */
-		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+		mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
 	}
 }
 
@@ -674,7 +680,7 @@ static void stp_work_fn(struct work_struct *work)
 
 	if (!stp_online) {
 		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
-		del_timer_sync(&stp_timer);
+		timer_delete_sync(&stp_timer);
 		goto out_unlock;
 	}
 
@@ -697,7 +703,7 @@ static void stp_work_fn(struct work_struct *work)
 
 	if (!check_sync_clock())
 		/*
-		 * There is a usable clock but the synchonization failed.
+		 * There is a usable clock but the synchronization failed.
 		 * Retry after a second.
 		 */
 		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
@@ -711,7 +717,7 @@ out_unlock:
 /*
  * STP subsys sysfs interface functions
  */
-static struct bus_type stp_subsys = {
+static const struct bus_type stp_subsys = {
 	.name		= "stp",
 	.dev_name	= "stp",
 };
@@ -724,8 +730,8 @@ static ssize_t ctn_id_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
-		ret = sprintf(buf, "%016lx\n",
-			      *(unsigned long *) stp_info.ctnid);
+		ret = sysfs_emit(buf, "%016lx\n",
+				 *(unsigned long *)stp_info.ctnid);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -740,7 +746,7 @@ static ssize_t ctn_type_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
-		ret = sprintf(buf, "%i\n", stp_info.ctn);
+		ret = sysfs_emit(buf, "%i\n", stp_info.ctn);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -755,7 +761,7 @@ static ssize_t dst_offset_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x2000))
-		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+		ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -770,7 +776,7 @@ static ssize_t leap_seconds_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x8000))
-		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+		ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -796,11 +802,11 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev,
 		return ret;
 
 	if (!stzi.lsoib.p)
-		return sprintf(buf, "0,0\n");
+		return sysfs_emit(buf, "0,0\n");
 
-	return sprintf(buf, "%lu,%d\n",
-		       tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
-		       stzi.lsoib.nlso - stzi.lsoib.also);
+	return sysfs_emit(buf, "%lu,%d\n",
+			  tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+			  stzi.lsoib.nlso - stzi.lsoib.also);
 }
 
 static DEVICE_ATTR_RO(leap_seconds_scheduled);
@@ -813,7 +819,7 @@ static ssize_t stratum_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
-		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+		ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -828,7 +834,7 @@ static ssize_t time_offset_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x0800))
-		ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+		ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -843,7 +849,7 @@ static ssize_t time_zone_offset_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x4000))
-		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+		ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -858,7 +864,7 @@ static ssize_t timing_mode_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
-		ret = sprintf(buf, "%i\n", stp_info.tmd);
+		ret = sysfs_emit(buf, "%i\n", stp_info.tmd);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -873,7 +879,7 @@ static ssize_t timing_state_show(struct device *dev,
 
 	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
-		ret = sprintf(buf, "%i\n", stp_info.tst);
+		ret = sysfs_emit(buf, "%i\n", stp_info.tst);
 	mutex_unlock(&stp_mutex);
 	return ret;
 }
@@ -884,7 +890,7 @@ static ssize_t online_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
-	return sprintf(buf, "%i\n", stp_online);
+	return sysfs_emit(buf, "%i\n", stp_online);
 }
 
 static ssize_t online_store(struct device *dev,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index c6eecd4a5302..3df048e190b1 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -6,6 +6,7 @@
 #define KMSG_COMPONENT "cpu"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/cpufeature.h>
 #include <linux/workqueue.h>
 #include <linux/memblock.h>
 #include <linux/uaccess.h>
@@ -24,7 +25,9 @@
 #include <linux/mm.h>
 #include <linux/nodemask.h>
 #include <linux/node.h>
+#include <asm/hiperdispatch.h>
 #include <asm/sysinfo.h>
+#include <asm/asm.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -47,6 +50,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
 static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
 
 static DECLARE_WORK(topology_work, topology_work_fn);
 
@@ -95,7 +99,7 @@ out:
 static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 {
 	static cpumask_t mask;
-	int i;
+	unsigned int max_cpu;
 
 	cpumask_clear(&mask);
 	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
@@ -104,9 +108,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 	if (topology_mode != TOPOLOGY_MODE_HW)
 		goto out;
 	cpu -= cpu % (smp_cpu_mtid + 1);
-	for (i = 0; i <= smp_cpu_mtid; i++) {
-		if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
-			cpumask_set_cpu(cpu + i, &mask);
+	max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+	for (; cpu <= max_cpu; cpu++) {
+		if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+			cpumask_set_cpu(cpu, &mask);
 	}
 out:
 	cpumask_copy(dst, &mask);
@@ -123,25 +128,27 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
 	unsigned int core;
 
 	for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
-		unsigned int rcore;
-		int lcpu, i;
+		unsigned int max_cpu, rcore;
+		int cpu;
 
 		rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
-		lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
-		if (lcpu < 0)
+		cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+		if (cpu < 0)
 			continue;
-		for (i = 0; i <= smp_cpu_mtid; i++) {
-			topo = &cpu_topology[lcpu + i];
+		max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+		for (; cpu <= max_cpu; cpu++) {
+			topo = &cpu_topology[cpu];
 			topo->drawer_id = drawer->id;
 			topo->book_id = book->id;
 			topo->socket_id = socket->id;
 			topo->core_id = rcore;
-			topo->thread_id = lcpu + i;
+			topo->thread_id = cpu;
 			topo->dedicated = tl_core->d;
-			cpumask_set_cpu(lcpu + i, &drawer->mask);
-			cpumask_set_cpu(lcpu + i, &book->mask);
-			cpumask_set_cpu(lcpu + i, &socket->mask);
-			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+			cpumask_set_cpu(cpu, &drawer->mask);
+			cpumask_set_cpu(cpu, &book->mask);
+			cpumask_set_cpu(cpu, &socket->mask);
+			smp_cpu_set_polarization(cpu, tl_core->pp);
+			smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
 		}
 	}
 }
@@ -219,22 +226,22 @@ static void topology_update_polarization_simple(void)
 
 static int ptf(unsigned long fc)
 {
-	int rc;
+	int cc;
 
 	asm volatile(
-		"	.insn	rre,0xb9a20000,%1,%1\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (rc)
-		: "d" (fc)  : "cc");
-	return rc;
+		"	.insn	rre,0xb9a20000,%[fc],%[fc]\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
+		: [fc] "d" (fc)
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc);
 }
 
 int topology_set_cpu_management(int fc)
 {
 	int cpu, rc;
 
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (!cpu_has_topology())
 		return -EOPNOTSUPP;
 	if (fc)
 		rc = ptf(PTF_VERTICAL);
@@ -268,6 +275,7 @@ void update_cpu_masks(void)
 			topo->drawer_id = id;
 		}
 	}
+	hd_reset_state();
 	for_each_online_cpu(cpu) {
 		topo = &cpu_topology[cpu];
 		pkg_first = cpumask_first(&topo->core_mask);
@@ -276,8 +284,10 @@ void update_cpu_masks(void)
 			for_each_cpu(sibling, &topo->core_mask) {
 				topo_sibling = &cpu_topology[sibling];
 				smt_first = cpumask_first(&topo_sibling->thread_mask);
-				if (sibling == smt_first)
+				if (sibling == smt_first) {
 					topo_package->booted_cores++;
+					hd_add_core(sibling);
+				}
 			}
 		} else {
 			topo->booted_cores = topo_package->booted_cores;
@@ -301,33 +311,33 @@ static void __arch_update_dedicated_flag(void *arg)
 static int __arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
-	int rc = 0;
+	int rc, hd_status;
 
+	hd_status = 0;
+	rc = 0;
 	mutex_lock(&smp_cpu_state_mutex);
-	if (MACHINE_HAS_TOPOLOGY) {
+	if (cpu_has_topology()) {
 		rc = 1;
 		store_topology(info);
 		tl_to_masks(info);
 	}
 	update_cpu_masks();
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (!cpu_has_topology())
 		topology_update_polarization_simple();
+	if (cpu_management == 1)
+		hd_status = hd_enable_hiperdispatch();
 	mutex_unlock(&smp_cpu_state_mutex);
+	if (hd_status == 0)
+		hd_disable_hiperdispatch();
 	return rc;
 }
 
 int arch_update_cpu_topology(void)
 {
-	struct device *dev;
-	int cpu, rc;
+	int rc;
 
 	rc = __arch_update_cpu_topology();
 	on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
-	for_each_online_cpu(cpu) {
-		dev = get_cpu_device(cpu);
-		if (dev)
-			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
-	}
 	return rc;
 }
 
@@ -362,12 +372,12 @@ static void set_topology_timer(void)
 	if (atomic_add_unless(&topology_poll, -1, 0))
 		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
 	else
-		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+		mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
 }
 
 void topology_expect_change(void)
 {
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (!cpu_has_topology())
 		return;
 	/* This is racy, but it doesn't matter since it is just a heuristic.
 	 * Worst case is that we poll in a higher frequency for a bit longer.
@@ -378,7 +388,24 @@ void topology_expect_change(void)
 	set_topology_timer();
 }
 
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+	int rc = 0;
+
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == polarization)
+		goto out;
+	rc = topology_set_cpu_management(polarization);
+	if (rc)
+		goto out;
+	cpu_management = polarization;
+	topology_expect_change();
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc;
+}
 
 static ssize_t dispatching_show(struct device *dev,
 				struct device_attribute *attr,
@@ -387,7 +414,7 @@ static ssize_t dispatching_show(struct device *dev,
 	ssize_t count;
 
 	mutex_lock(&smp_cpu_state_mutex);
-	count = sprintf(buf, "%d\n", cpu_management);
+	count = sysfs_emit(buf, "%d\n", cpu_management);
 	mutex_unlock(&smp_cpu_state_mutex);
 	return count;
 }
@@ -404,19 +431,7 @@ static ssize_t dispatching_store(struct device *dev,
 		return -EINVAL;
 	if (val != 0 && val != 1)
 		return -EINVAL;
-	rc = 0;
-	cpus_read_lock();
-	mutex_lock(&smp_cpu_state_mutex);
-	if (cpu_management == val)
-		goto out;
-	rc = topology_set_cpu_management(val);
-	if (rc)
-		goto out;
-	cpu_management = val;
-	topology_expect_change();
-out:
-	mutex_unlock(&smp_cpu_state_mutex);
-	cpus_read_unlock();
+	rc = set_polarization(val);
 	return rc ? rc : count;
 }
 static DEVICE_ATTR_RW(dispatching);
@@ -430,19 +445,19 @@ static ssize_t cpu_polarization_show(struct device *dev,
 	mutex_lock(&smp_cpu_state_mutex);
 	switch (smp_cpu_get_polarization(cpu)) {
 	case POLARIZATION_HRZ:
-		count = sprintf(buf, "horizontal\n");
+		count = sysfs_emit(buf, "horizontal\n");
 		break;
 	case POLARIZATION_VL:
-		count = sprintf(buf, "vertical:low\n");
+		count = sysfs_emit(buf, "vertical:low\n");
 		break;
 	case POLARIZATION_VM:
-		count = sprintf(buf, "vertical:medium\n");
+		count = sysfs_emit(buf, "vertical:medium\n");
 		break;
 	case POLARIZATION_VH:
-		count = sprintf(buf, "vertical:high\n");
+		count = sysfs_emit(buf, "vertical:high\n");
 		break;
 	default:
-		count = sprintf(buf, "unknown\n");
+		count = sysfs_emit(buf, "unknown\n");
 		break;
 	}
 	mutex_unlock(&smp_cpu_state_mutex);
@@ -466,7 +481,7 @@ static ssize_t cpu_dedicated_show(struct device *dev,
 	ssize_t count;
 
 	mutex_lock(&smp_cpu_state_mutex);
-	count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+	count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu));
 	mutex_unlock(&smp_cpu_state_mutex);
 	return count;
 }
@@ -486,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu)
 	int rc;
 
 	rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
-	if (rc || !MACHINE_HAS_TOPOLOGY)
+	if (rc || !cpu_has_topology())
 		return rc;
 	rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
 	if (rc)
@@ -520,7 +535,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
 	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ cpu_cpu_mask, SD_INIT_NAME(PKG) },
 	{ NULL, },
 };
 
@@ -534,33 +549,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
 		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
 	nr_masks = max(nr_masks, 1);
 	for (i = 0; i < nr_masks; i++) {
-		mask->next = memblock_alloc(sizeof(*mask->next), 8);
-		if (!mask->next)
-			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-			      __func__, sizeof(*mask->next), 8);
+		mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
 		mask = mask->next;
 	}
 }
 
+static int __init detect_polarization(union topology_entry *tle)
+{
+	struct topology_core *tl_core;
+
+	while (tle->nl)
+		tle = next_tle(tle);
+	tl_core = (struct topology_core *)tle;
+	return tl_core->pp != POLARIZATION_HRZ;
+}
+
 void __init topology_init_early(void)
 {
 	struct sysinfo_15_1_x *info;
 
 	set_sched_topology(s390_topology);
 	if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
-		if (MACHINE_HAS_TOPOLOGY)
+		if (cpu_has_topology())
 			topology_mode = TOPOLOGY_MODE_HW;
 		else
 			topology_mode = TOPOLOGY_MODE_SINGLE;
 	}
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (!cpu_has_topology())
 		goto out;
-	tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-	if (!tl_info)
-		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-		      __func__, PAGE_SIZE, PAGE_SIZE);
+	tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
 	info = tl_info;
 	store_topology(info);
+	cpu_management = detect_polarization(info->tle);
 	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
 		info->mag[0], info->mag[1], info->mag[2], info->mag[3],
 		info->mag[4], info->mag[5], info->mnest);
@@ -577,7 +597,7 @@ static inline int topology_get_mode(int enabled)
 {
 	if (!enabled)
 		return TOPOLOGY_MODE_SINGLE;
-	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+	return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
 }
 
 static inline int topology_is_enabled(void)
@@ -598,7 +618,7 @@ static int __init topology_setup(char *str)
 }
 early_param("topology", topology_setup);
 
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
 				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int enabled = topology_is_enabled();
@@ -628,33 +648,58 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write,
 	return rc;
 }
 
-static struct ctl_table topology_ctl_table[] = {
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+				    void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int polarization;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &polarization,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	polarization = cpu_management;
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+	return set_polarization(polarization);
+}
+
+static const struct ctl_table topology_ctl_table[] = {
 	{
 		.procname	= "topology",
 		.mode		= 0644,
 		.proc_handler	= topology_ctl_handler,
 	},
-	{ },
-};
-
-static struct ctl_table topology_dir_table[] = {
 	{
-		.procname	= "s390",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= topology_ctl_table,
+		.procname	= "polarization",
+		.mode		= 0644,
+		.proc_handler	= polarization_ctl_handler,
 	},
-	{ },
 };
 
 static int __init topology_init(void)
 {
+	struct device *dev_root;
+	int rc = 0;
+
 	timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
-	if (MACHINE_HAS_TOPOLOGY)
+	if (cpu_has_topology())
 		set_topology_timer();
 	else
 		topology_update_polarization_simple();
-	register_sysctl_table(topology_dir_table);
-	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+		set_polarization(1);
+	register_sysctl("s390", topology_ctl_table);
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_dispatching);
+		put_device(dev_root);
+	}
+	return rc;
 }
 device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1d2aa448d103..19687dab32f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -3,18 +3,13 @@
  *  S390 version
  *    Copyright IBM Corp. 1999, 2000
  *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  *
  *  Derived from "arch/i386/kernel/traps.c"
  *    Copyright (C) 1991, 1992 Linus Torvalds
  */
 
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
-#include "asm/irqflags.h"
-#include "asm/ptrace.h"
+#include <linux/cpufeature.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/randomize_kstack.h>
@@ -27,9 +22,13 @@
 #include <linux/uaccess.h>
 #include <linux/cpu.h>
 #include <linux/entry-common.h>
+#include <linux/kmsan.h>
 #include <asm/asm-extable.h>
-#include <asm/fpu/api.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
 #include <asm/vtime.h>
+#include <asm/fpu.h>
+#include <asm/fault.h>
 #include "entry.h"
 
 static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -40,29 +39,30 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
 		address = current->thread.trap_tdb.data[3];
 	else
 		address = regs->psw.addr;
-	return (void __user *) (address - (regs->int_code >> 16));
+	return (void __user *)(address - (regs->int_code >> 16));
 }
 
+#ifdef CONFIG_GENERIC_BUG
 int is_valid_bugaddr(unsigned long addr)
 {
 	return 1;
 }
+#endif
 
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 {
 	if (user_mode(regs)) {
 		force_sig_fault(si_signo, si_code, get_trap_ip(regs));
 		report_user_fault(regs, si_signo, 0);
-        } else {
+	} else {
 		if (!fixup_exception(regs))
 			die(regs, str);
-        }
+	}
 }
 
 static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 {
-	if (notify_die(DIE_TRAP, str, regs, 0,
-		       regs->int_code, si_signo) == NOTIFY_STOP)
+	if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP)
 		return;
 	do_report_trap(regs, si_signo, si_code, str);
 }
@@ -74,8 +74,7 @@ void do_per_trap(struct pt_regs *regs)
 		return;
 	if (!current->ptrace)
 		return;
-	force_sig_fault(SIGTRAP, TRAP_HWBKPT,
-		(void __force __user *) current->thread.per_event.address);
+	force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address);
 }
 NOKPROBE_SYMBOL(do_per_trap);
 
@@ -94,36 +93,25 @@ static void name(struct pt_regs *regs)		\
 	do_trap(regs, signr, sicode, str);	\
 }
 
-DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
-	      "addressing exception")
-DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
-	      "execute exception")
-DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
-	      "fixpoint divide exception")
-DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
-	      "fixpoint overflow exception")
-DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
-	      "HFP overflow exception")
-DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
-	      "HFP underflow exception")
-DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
-	      "HFP significance exception")
-DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
-	      "HFP divide exception")
-DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
-	      "HFP square root exception")
-DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
-	      "operand exception")
-DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
-	      "privileged operation")
-DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
-	      "special operation exception")
-DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
-	      "transaction constraint exception")
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception")
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception");
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception")
 
 static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
 {
 	int si_code = 0;
+
 	/* FPC[2] is Data Exception Code */
 	if ((fpc & 0x00000300) == 0) {
 		/* bits 6 and 7 of DXC are 0 iff IEEE exception */
@@ -149,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs)
 
 static void illegal_op(struct pt_regs *regs)
 {
-        __u8 opcode[6];
-	__u16 __user *location;
 	int is_uprobe_insn = 0;
+	u16 __user *location;
 	int signal = 0;
+	u16 opcode;
 
 	location = get_trap_ip(regs);
-
 	if (user_mode(regs)) {
-		if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+		if (get_user(opcode, location))
 			return;
-		if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+		if (opcode == S390_BREAKPOINT_U16) {
 			if (current->ptrace)
 				force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
 			else
 				signal = SIGILL;
 #ifdef CONFIG_UPROBES
-		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+		} else if (opcode == UPROBE_SWBP_INSN) {
 			is_uprobe_insn = 1;
 #endif
-		} else
+		} else {
 			signal = SIGILL;
+		}
 	}
 	/*
-	 * We got either an illegal op in kernel mode, or user space trapped
+	 * This is either an illegal op in kernel mode, or user space trapped
 	 * on a uprobes illegal instruction. See if kprobes or uprobes picks
 	 * it up. If not, SIGILL.
 	 */
 	if (is_uprobe_insn || !user_mode(regs)) {
-		if (notify_die(DIE_BPT, "bpt", regs, 0,
-			       3, SIGTRAP) != NOTIFY_STOP)
+		if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP)
 			signal = SIGILL;
 	}
 	if (signal)
@@ -186,21 +173,13 @@ static void illegal_op(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(illegal_op);
 
-DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
-	      "specification exception");
-
 static void vector_exception(struct pt_regs *regs)
 {
 	int si_code, vic;
 
-	if (!MACHINE_HAS_VX) {
-		do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
-		return;
-	}
-
 	/* get vector interrupt code from fpc */
-	save_fpu_regs();
-	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+	save_user_fpu_regs();
+	vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
 		si_code = FPE_FLTINV;
@@ -225,9 +204,9 @@ static void vector_exception(struct pt_regs *regs)
 
 static void data_exception(struct pt_regs *regs)
 {
-	save_fpu_regs();
-	if (current->thread.fpu.fpc & FPC_DXC_MASK)
-		do_fp_trap(regs, current->thread.fpu.fpc);
+	save_user_fpu_regs();
+	if (current->thread.ufpu.fpc & FPC_DXC_MASK)
+		do_fp_trap(regs, current->thread.ufpu.fpc);
 	else
 		do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
 }
@@ -245,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs)
 {
 	if (user_mode(regs))
 		return;
-
 	switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
 	case BUG_TRAP_TYPE_NONE:
 		fixup_exception(regs);
@@ -258,15 +236,20 @@ static void monitor_event_exception(struct pt_regs *regs)
 	}
 }
 
-void kernel_stack_overflow(struct pt_regs *regs)
+void kernel_stack_invalid(struct pt_regs *regs)
 {
+	/*
+	 * Normally regs are unpoisoned by the generic entry code, but
+	 * kernel_stack_overflow() is a rare case that is called bypassing it.
+	 */
+	kmsan_unpoison_entry_regs(regs);
 	bust_spinlocks(1);
-	printk("Kernel stack overflow.\n");
+	pr_emerg("Kernel stack pointer invalid\n");
 	show_regs(regs);
 	bust_spinlocks(0);
-	panic("Corrupt kernel stack, can't continue.");
+	panic("Invalid kernel stack pointer, cannot continue");
 }
-NOKPROBE_SYMBOL(kernel_stack_overflow);
+NOKPROBE_SYMBOL(kernel_stack_invalid);
 
 static void __init test_monitor_call(void)
 {
@@ -274,18 +257,30 @@ static void __init test_monitor_call(void)
 
 	if (!IS_ENABLED(CONFIG_BUG))
 		return;
-	asm volatile(
+	asm_inline volatile(
 		"	mc	0,0\n"
-		"0:	xgr	%0,%0\n"
+		"0:	lhi	%[val],0\n"
 		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (val));
+		EX_TABLE(0b, 1b)
+		: [val] "+d" (val));
 	if (!val)
 		panic("Monitor call doesn't work!\n");
 }
 
 void __init trap_init(void)
 {
+	struct lowcore *lc = get_lowcore();
+	unsigned long flags;
+	struct ctlreg cr0;
+
+	local_irq_save(flags);
+	cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
+	psw_bits(lc->external_new_psw).mcheck = 1;
+	psw_bits(lc->program_new_psw).mcheck = 1;
+	psw_bits(lc->svc_new_psw).mcheck = 1;
+	psw_bits(lc->io_new_psw).mcheck = 1;
+	local_ctl_load(0, &cr0);
+	local_irq_restore(flags);
 	local_mcck_enable();
 	test_monitor_call();
 }
@@ -294,36 +289,47 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
 
 void noinstr __do_pgm_check(struct pt_regs *regs)
 {
-	unsigned int trapnr;
+	struct lowcore *lc = get_lowcore();
 	irqentry_state_t state;
+	unsigned int trapnr;
+	union teid teid;
 
-	regs->int_code = S390_lowcore.pgm_int_code;
-	regs->int_parm_long = S390_lowcore.trans_exc_code;
-
+	teid.val = lc->trans_exc_code;
+	regs->int_code = lc->pgm_int_code;
+	regs->int_parm_long = teid.val;
+	/*
+	 * In case of a guest fault, short-circuit the fault handler and return.
+	 * This way the sie64a() function will return 0; fault address and
+	 * other relevant bits are saved in current->thread.gmap_teid, and
+	 * the fault number in current->thread.gmap_int_code. KVM will be
+	 * able to use this information to handle the fault.
+	 */
+	if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
+		current->thread.gmap_teid.val = regs->int_parm_long;
+		current->thread.gmap_int_code = regs->int_code & 0xffff;
+		return;
+	}
 	state = irqentry_enter(regs);
-
 	if (user_mode(regs)) {
 		update_timer_sys();
-		if (!static_branch_likely(&cpu_has_bear)) {
+		if (!cpu_has_bear()) {
 			if (regs->last_break < 4096)
 				regs->last_break = 1;
 		}
 		current->thread.last_break = regs->last_break;
 	}
-
-	if (S390_lowcore.pgm_code & 0x0200) {
+	if (lc->pgm_code & 0x0200) {
 		/* transaction abort */
-		current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+		current->thread.trap_tdb = lc->pgm_tdb;
 	}
-
-	if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+	if (lc->pgm_code & PGM_INT_CODE_PER) {
 		if (user_mode(regs)) {
 			struct per_event *ev = &current->thread.per_event;
 
 			set_thread_flag(TIF_PER_TRAP);
-			ev->address = S390_lowcore.per_address;
-			ev->cause = S390_lowcore.per_code_combined;
-			ev->paid = S390_lowcore.per_access_id;
+			ev->address = lc->per_address;
+			ev->cause = lc->per_code_combined;
+			ev->paid = lc->per_access_id;
 		} else {
 			/* PER event in kernel is kprobes */
 			__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
@@ -331,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
 			goto out;
 		}
 	}
-
 	if (!irqs_disabled_flags(regs->psw.mask))
 		trace_hardirqs_on();
 	__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
-
 	trapnr = regs->int_code & PGM_INT_CODE_MASK;
 	if (trapnr)
 		pgm_check_table[trapnr](regs);
@@ -387,8 +391,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
 	[0x3b]		= do_dat_exception,
 	[0x3c]		= default_trap_handler,
 	[0x3d]		= do_secure_storage_access,
-	[0x3e]		= do_non_secure_storage_access,
-	[0x3f]		= do_secure_storage_violation,
+	[0x3e]		= default_trap_handler,
+	[0x3f]		= default_trap_handler,
 	[0x40]		= monitor_event_exception,
 	[0x41 ... 0x7f] = default_trap_handler,
 };
@@ -399,5 +403,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
 	__stringify(default_trap_handler))
 
 COND_TRAP(do_secure_storage_access);
-COND_TRAP(do_non_secure_storage_access);
-COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 0ece156fdd7c..cd44be2b6ce8 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -49,6 +49,8 @@ static inline bool is_final_pt_regs(struct unwind_state *state,
 	       READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
 }
 
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct stack_info *info = &state->stack_info;
@@ -118,6 +120,8 @@ out_stop:
 }
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		    struct pt_regs *regs, unsigned long first_frame)
 {
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index b88345ef8bd9..5b0633ea8d93 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -12,7 +12,6 @@
 #include <linux/kdebug.h>
 #include <linux/sched/task_stack.h>
 
-#include <asm/switch_to.h>
 #include <asm/facility.h>
 #include <asm/kprobes.h>
 #include <asm/dis.h>
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index a5425075dd25..b99478e84da4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -2,7 +2,7 @@
 /*
  * Common Ultravisor functions and initialization
  *
- * Copyright IBM Corp. 2019, 2020
+ * Copyright IBM Corp. 2019, 2024
  */
 #define KMSG_COMPONENT "prot_virt"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -14,21 +14,29 @@
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
 #include <asm/facility.h>
 #include <asm/sections.h>
 #include <asm/uv.h>
 
 /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 int __bootdata_preserved(prot_virt_guest);
-#endif
+EXPORT_SYMBOL(prot_virt_guest);
 
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
 struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
 
-#if IS_ENABLED(CONFIG_KVM)
 int __bootdata_preserved(prot_virt_host);
 EXPORT_SYMBOL(prot_virt_host);
-EXPORT_SYMBOL(uv_info);
 
 static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
 {
@@ -80,7 +88,7 @@ fail:
  * Requests the Ultravisor to pin the page in the shared state. This will
  * cause an intercept when the guest attempts to unshare the pinned page.
  */
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
 {
 	struct uv_cb_cfs uvcb = {
 		.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -92,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
 		return -EINVAL;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(uv_pin_shared);
 
 /*
  * Requests the Ultravisor to destroy a guest page and make it
@@ -100,7 +109,7 @@ static int uv_pin_shared(unsigned long paddr)
  *
  * @paddr: Absolute host address of page to be destroyed
  */
-static int uv_destroy_page(unsigned long paddr)
+static int uv_destroy(unsigned long paddr)
 {
 	struct uv_cb_cfs uvcb = {
 		.header.cmd = UVC_CMD_DESTR_SEC_STOR,
@@ -121,20 +130,33 @@ static int uv_destroy_page(unsigned long paddr)
 }
 
 /*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio
  */
-int uv_destroy_owned_page(unsigned long paddr)
+int uv_destroy_folio(struct folio *folio)
 {
-	struct page *page = phys_to_page(paddr);
 	int rc;
 
-	get_page(page);
-	rc = uv_destroy_page(paddr);
+	/* Large folios cannot be secure */
+	if (unlikely(folio_test_large(folio)))
+		return 0;
+
+	folio_get(folio);
+	rc = uv_destroy(folio_to_phys(folio));
 	if (!rc)
-		clear_bit(PG_arch_1, &page->flags);
-	put_page(page);
+		clear_bit(PG_arch_1, &folio->flags);
+	folio_put(folio);
 	return rc;
 }
+EXPORT_SYMBOL(uv_destroy_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+	VM_WARN_ON(!pte_present(pte));
+	return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
 
 /*
  * Requests the Ultravisor to encrypt a guest page and make it
@@ -154,65 +176,119 @@ int uv_convert_from_secure(unsigned long paddr)
 		return -EINVAL;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
 
 /*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio.
  */
-int uv_convert_owned_from_secure(unsigned long paddr)
+int uv_convert_from_secure_folio(struct folio *folio)
 {
-	struct page *page = phys_to_page(paddr);
 	int rc;
 
-	get_page(page);
-	rc = uv_convert_from_secure(paddr);
+	/* Large folios cannot be secure */
+	if (unlikely(folio_test_large(folio)))
+		return 0;
+
+	folio_get(folio);
+	rc = uv_convert_from_secure(folio_to_phys(folio));
 	if (!rc)
-		clear_bit(PG_arch_1, &page->flags);
-	put_page(page);
+		clear_bit(PG_arch_1, &folio->flags);
+	folio_put(folio);
 	return rc;
 }
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+	VM_WARN_ON(!pte_present(pte));
+	return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+	/*
+	 * The misc feature indicates, among other things, that importing a
+	 * shared page from a different protected VM will automatically also
+	 * transfer its ownership.
+	 */
+	if (uv_has_feature(BIT_UV_FEAT_MISC))
+		return false;
+	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+		return false;
+	return atomic_read(&mm->context.protected_count) > 1;
+}
 
 /*
- * Calculate the expected ref_count for a page that would otherwise have no
+ * Calculate the expected ref_count for a folio that would otherwise have no
  * further pins. This was cribbed from similar functions in other places in
  * the kernel, but with some slight modifications. We know that a secure
- * page can not be a huge page for example.
+ * folio can not be a large folio, for example.
  */
-static int expected_page_refs(struct page *page)
+static int expected_folio_refs(struct folio *folio)
 {
 	int res;
 
-	res = page_mapcount(page);
-	if (PageSwapCache(page)) {
+	res = folio_mapcount(folio);
+	if (folio_test_swapcache(folio)) {
 		res++;
-	} else if (page_mapping(page)) {
+	} else if (folio_mapping(folio)) {
 		res++;
-		if (page_has_private(page))
+		if (folio->private)
 			res++;
 	}
 	return res;
 }
 
-static int make_secure_pte(pte_t *ptep, unsigned long addr,
-			   struct page *exp_page, struct uv_cb_header *uvcb)
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ *         -EBUSY if the folio is in writeback or has too many references;
+ *         -EAGAIN if the UVC needs to be attempted again;
+ *         -ENXIO if the address is not mapped;
+ *         -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ *          (it's the same logic as split_folio()), and the folio must be
+ *          locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
 {
-	pte_t entry = READ_ONCE(*ptep);
-	struct page *page;
 	int expected, cc = 0;
 
-	if (!pte_present(entry))
-		return -ENXIO;
-	if (pte_val(entry) & _PAGE_INVALID)
-		return -ENXIO;
-
-	page = pte_page(entry);
-	if (page != exp_page)
-		return -ENXIO;
-	if (PageWriteback(page))
-		return -EAGAIN;
-	expected = expected_page_refs(page);
-	if (!page_ref_freeze(page, expected))
+	if (folio_test_writeback(folio))
 		return -EBUSY;
-	set_bit(PG_arch_1, &page->flags);
+	expected = expected_folio_refs(folio) + 1;
+	if (!folio_ref_freeze(folio, expected))
+		return -EBUSY;
+	set_bit(PG_arch_1, &folio->flags);
 	/*
 	 * If the UVC does not succeed or fail immediately, we don't want to
 	 * loop for long, or we might get stall notifications.
@@ -222,9 +298,9 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
 	 * -EAGAIN and we let the callers deal with it.
 	 */
 	cc = __uv_call(0, (u64)uvcb);
-	page_ref_unfreeze(page, expected);
+	folio_ref_unfreeze(folio, expected);
 	/*
-	 * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+	 * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
 	 * If busy or partially completed, return -EAGAIN.
 	 */
 	if (cc == UVC_CC_OK)
@@ -234,164 +310,255 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
 	return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
 }
 
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
 {
-	struct vm_area_struct *vma;
-	bool local_drain = false;
-	spinlock_t *ptelock;
-	unsigned long uaddr;
-	struct page *page;
-	pte_t *ptep;
 	int rc;
 
-again:
-	rc = -EFAULT;
-	mmap_read_lock(gmap->mm);
+	if (!folio_trylock(folio))
+		return -EAGAIN;
+	if (should_export_before_import(uvcb, mm))
+		uv_convert_from_secure(folio_to_phys(folio));
+	rc = __make_folio_secure(folio, uvcb);
+	folio_unlock(folio);
+
+	return rc;
+}
+
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ *			       split the folio if it is large.
+ * @mm:    the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ *          the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ *	   -EAGAIN if splitting the large folio was not successful,
+ *		   but another attempt can be made;
+ *	   -EINVAL in case of other folio splitting errors. See split_folio().
+ */
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
+{
+	int rc, tried_splits;
+
+	lockdep_assert_not_held(&mm->mmap_lock);
+	folio_wait_writeback(folio);
+	lru_add_drain_all();
+
+	if (!folio_test_large(folio))
+		return 0;
+
+	for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+		struct address_space *mapping;
+		loff_t lstart, lend;
+		struct inode *inode;
+
+		folio_lock(folio);
+		rc = split_folio(folio);
+		if (rc != -EBUSY) {
+			folio_unlock(folio);
+			return rc;
+		}
 
-	uaddr = __gmap_translate(gmap, gaddr);
-	if (IS_ERR_VALUE(uaddr))
-		goto out;
-	vma = vma_lookup(gmap->mm, uaddr);
-	if (!vma)
-		goto out;
-	/*
-	 * Secure pages cannot be huge and userspace should not combine both.
-	 * In case userspace does it anyway this will result in an -EFAULT for
-	 * the unpack. The guest is thus never reaching secure mode. If
-	 * userspace is playing dirty tricky with mapping huge pages later
-	 * on this will result in a segmentation fault.
-	 */
-	if (is_vm_hugetlb_page(vma))
-		goto out;
-
-	rc = -ENXIO;
-	page = follow_page(vma, uaddr, FOLL_WRITE);
-	if (IS_ERR_OR_NULL(page))
-		goto out;
-
-	lock_page(page);
-	ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
-	rc = make_secure_pte(ptep, uaddr, page, uvcb);
-	pte_unmap_unlock(ptep, ptelock);
-	unlock_page(page);
-out:
-	mmap_read_unlock(gmap->mm);
-
-	if (rc == -EAGAIN) {
-		/*
-		 * If we are here because the UVC returned busy or partial
-		 * completion, this is just a useless check, but it is safe.
-		 */
-		wait_on_page_writeback(page);
-	} else if (rc == -EBUSY) {
 		/*
-		 * If we have tried a local drain and the page refcount
-		 * still does not match our expected safe value, try with a
-		 * system wide drain. This is needed if the pagevecs holding
-		 * the page are on a different CPU.
+		 * Splitting with -EBUSY can fail for various reasons, but we
+		 * have to handle one case explicitly for now: some mappings
+		 * don't allow for splitting dirty folios; writeback will
+		 * mark them clean again, including marking all page table
+		 * entries mapping the folio read-only, to catch future write
+		 * attempts.
+		 *
+		 * While the system should be writing back dirty folios in the
+		 * background, we obtained this folio by looking up a writable
+		 * page table entry. On these problematic mappings, writable
+		 * page table entries imply dirty folios, preventing the
+		 * split in the first place.
+		 *
+		 * To prevent a livelock when trigger writeback manually and
+		 * letting the caller look up the folio again in the page
+		 * table (turning it dirty), immediately try to split again.
+		 *
+		 * This is only a problem for some mappings (e.g., XFS);
+		 * mappings that do not support writeback (e.g., shmem) do not
+		 * apply.
 		 */
-		if (local_drain) {
-			lru_add_drain_all();
-			/* We give up here, and let the caller try again */
-			return -EAGAIN;
+		if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+		    !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+			folio_unlock(folio);
+			break;
 		}
+
 		/*
-		 * We are here if the page refcount does not match the
-		 * expected safe value. The main culprits are usually
-		 * pagevecs. With lru_add_drain() we drain the pagevecs
-		 * on the local CPU so that hopefully the refcount will
-		 * reach the expected safe value.
+		 * Ideally, we'd only trigger writeback on this exact folio. But
+		 * there is no easy way to do that, so we'll stabilize the
+		 * mapping while we still hold the folio lock, so we can drop
+		 * the folio lock to trigger writeback on the range currently
+		 * covered by the folio instead.
 		 */
-		lru_add_drain();
-		local_drain = true;
-		/* And now we try again immediately after draining */
-		goto again;
-	} else if (rc == -ENXIO) {
-		if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
-			return -EFAULT;
-		return -EAGAIN;
+		mapping = folio->mapping;
+		lstart = folio_pos(folio);
+		lend = lstart + folio_size(folio) - 1;
+		inode = igrab(mapping->host);
+		folio_unlock(folio);
+
+		if (unlikely(!inode))
+			break;
+
+		filemap_write_and_wait_range(mapping, lstart, lend);
+		iput(mapping->host);
 	}
-	return rc;
+	return -EAGAIN;
 }
-EXPORT_SYMBOL_GPL(gmap_make_secure);
 
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
 {
-	struct uv_cb_cts uvcb = {
-		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
-		.header.len = sizeof(uvcb),
-		.guest_handle = gmap->guest_handle,
-		.gaddr = gaddr,
-	};
+	struct vm_area_struct *vma;
+	struct folio_walk fw;
+	struct folio *folio;
+	int rc;
+
+	mmap_read_lock(mm);
+	vma = vma_lookup(mm, hva);
+	if (!vma) {
+		mmap_read_unlock(mm);
+		return -EFAULT;
+	}
+	folio = folio_walk_start(&fw, vma, hva, 0);
+	if (!folio) {
+		mmap_read_unlock(mm);
+		return -ENXIO;
+	}
+
+	folio_get(folio);
+	/*
+	 * Secure pages cannot be huge and userspace should not combine both.
+	 * In case userspace does it anyway this will result in an -EFAULT for
+	 * the unpack. The guest is thus never reaching secure mode.
+	 * If userspace plays dirty tricks and decides to map huge pages at a
+	 * later point in time, it will receive a segmentation fault or
+	 * KVM_RUN will return -EFAULT.
+	 */
+	if (folio_test_hugetlb(folio))
+		rc = -EFAULT;
+	else if (folio_test_large(folio))
+		rc = -E2BIG;
+	else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+		rc = -ENXIO;
+	else
+		rc = make_folio_secure(mm, folio, uvcb);
+	folio_walk_end(&fw, vma);
+	mmap_read_unlock(mm);
+
+	if (rc == -E2BIG || rc == -EBUSY) {
+		rc = s390_wiggle_split_folio(mm, folio);
+		if (!rc)
+			rc = -EAGAIN;
+	}
+	folio_put(folio);
 
-	return gmap_make_secure(gmap, gaddr, &uvcb);
+	return rc;
 }
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+EXPORT_SYMBOL_GPL(make_hva_secure);
 
 /*
- * To be called with the page locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the page concurrently. Having 2
- * parallel make_page_accessible is fine, as the UV calls will become a
- * no-op if the page is already exported.
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
  */
-int arch_make_page_accessible(struct page *page)
+int arch_make_folio_accessible(struct folio *folio)
 {
 	int rc = 0;
 
-	/* Hugepage cannot be protected, so nothing to do */
-	if (PageHuge(page))
+	/* Large folios cannot be secure */
+	if (unlikely(folio_test_large(folio)))
 		return 0;
 
 	/*
-	 * PG_arch_1 is used in 3 places:
-	 * 1. for kernel page tables during early boot
-	 * 2. for storage keys of huge pages and KVM
-	 * 3. As an indication that this page might be secure. This can
+	 * PG_arch_1 is used in 2 places:
+	 * 1. for storage keys of hugetlb folios and KVM
+	 * 2. As an indication that this small folio might be secure. This can
 	 *    overindicate, e.g. we set the bit before calling
 	 *    convert_to_secure.
-	 * As secure pages are never huge, all 3 variants can co-exists.
+	 * As secure pages are never large folios, both variants can co-exists.
 	 */
-	if (!test_bit(PG_arch_1, &page->flags))
+	if (!test_bit(PG_arch_1, &folio->flags))
 		return 0;
 
-	rc = uv_pin_shared(page_to_phys(page));
+	rc = uv_pin_shared(folio_to_phys(folio));
 	if (!rc) {
-		clear_bit(PG_arch_1, &page->flags);
+		clear_bit(PG_arch_1, &folio->flags);
 		return 0;
 	}
 
-	rc = uv_convert_from_secure(page_to_phys(page));
+	rc = uv_convert_from_secure(folio_to_phys(folio));
 	if (!rc) {
-		clear_bit(PG_arch_1, &page->flags);
+		clear_bit(PG_arch_1, &folio->flags);
 		return 0;
 	}
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
-
-#endif
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
 
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
 static ssize_t uv_query_facilities(struct kobject *kobj,
-				   struct kobj_attribute *attr, char *page)
+				   struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
-			uv_info.inst_calls_list[0],
-			uv_info.inst_calls_list[1],
-			uv_info.inst_calls_list[2],
-			uv_info.inst_calls_list[3]);
+	return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+			  uv_info.inst_calls_list[0],
+			  uv_info.inst_calls_list[1],
+			  uv_info.inst_calls_list[2],
+			  uv_info.inst_calls_list[3]);
 }
 
 static struct kobj_attribute uv_query_facilities_attr =
 	__ATTR(facilities, 0444, uv_query_facilities, NULL);
 
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+	__ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+	__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+	__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+					       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+	__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+	__ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
 static ssize_t uv_query_feature_indications(struct kobject *kobj,
 					    struct kobj_attribute *attr, char *buf)
 {
@@ -402,69 +569,208 @@ static struct kobj_attribute uv_query_feature_indications_attr =
 	__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
 
 static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
+				       struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%d\n",
-			uv_info.max_guest_cpu_id + 1);
+	return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
 }
 
 static struct kobj_attribute uv_query_max_guest_cpus_attr =
 	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
 
 static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
-				      struct kobj_attribute *attr, char *page)
+				      struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%d\n",
-			uv_info.max_num_sec_conf);
+	return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
 }
 
 static struct kobj_attribute uv_query_max_guest_vms_attr =
 	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
 
 static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
+				       struct kobj_attribute *attr, char *buf)
 {
-	return scnprintf(page, PAGE_SIZE, "%lx\n",
-			uv_info.max_sec_stor_addr);
+	return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
 }
 
 static struct kobj_attribute uv_query_max_guest_addr_attr =
 	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
 
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+					     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+	__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+	__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+						struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+	__ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+	__ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+	__ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n",
+			  uv_info.max_assoc_secrets + uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+	__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
+static ssize_t uv_query_max_retr_secrets(struct kobject *kobj,
+					 struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_retr_secrets_attr =
+	__ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL);
+
+static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj,
+					  struct kobj_attribute *attr,
+					  char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets);
+}
+
+static struct kobj_attribute uv_query_max_assoc_secrets_attr =
+	__ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL);
+
 static struct attribute *uv_query_attrs[] = {
 	&uv_query_facilities_attr.attr,
 	&uv_query_feature_indications_attr.attr,
 	&uv_query_max_guest_cpus_attr.attr,
 	&uv_query_max_guest_vms_attr.attr,
 	&uv_query_max_guest_addr_attr.attr,
+	&uv_query_supp_se_hdr_ver_attr.attr,
+	&uv_query_supp_se_hdr_pcf_attr.attr,
+	&uv_query_dump_storage_state_len_attr.attr,
+	&uv_query_dump_finalize_len_attr.attr,
+	&uv_query_dump_cpu_len_attr.attr,
+	&uv_query_supp_att_req_hdr_ver_attr.attr,
+	&uv_query_supp_att_pflags_attr.attr,
+	&uv_query_supp_add_secret_req_ver_attr.attr,
+	&uv_query_supp_add_secret_pcf_attr.attr,
+	&uv_query_supp_secret_types_attr.attr,
+	&uv_query_max_secrets_attr.attr,
+	&uv_query_max_assoc_secrets_attr.attr,
+	&uv_query_max_retr_secrets_attr.attr,
 	NULL,
 };
 
+static inline struct uv_cb_query_keys uv_query_keys(void)
+{
+	struct uv_cb_query_keys uvcb = {
+		.header.cmd = UVC_CMD_QUERY_KEYS,
+		.header.len = sizeof(uvcb)
+	};
+
+	uv_call(0, (uint64_t)&uvcb);
+	return uvcb;
+}
+
+static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at)
+{
+	return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n",
+			    hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]);
+}
+
+static ssize_t uv_keys_host_key(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct uv_cb_query_keys uvcb = uv_query_keys();
+
+	return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_host_key_attr =
+	__ATTR(host_key, 0444, uv_keys_host_key, NULL);
+
+static ssize_t uv_keys_backup_host_key(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	struct uv_cb_query_keys uvcb = uv_query_keys();
+
+	return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_backup_host_key_attr =
+	__ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL);
+
+static ssize_t uv_keys_all(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct uv_cb_query_keys uvcb = uv_query_keys();
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++)
+		len += emit_hash(uvcb.key_hashes + i, buf, len);
+
+	return len;
+}
+
+static struct kobj_attribute uv_keys_all_attr =
+	__ATTR(all, 0444, uv_keys_all, NULL);
+
 static struct attribute_group uv_query_attr_group = {
 	.attrs = uv_query_attrs,
 };
 
+static struct attribute *uv_keys_attrs[] = {
+	&uv_keys_host_key_attr.attr,
+	&uv_keys_backup_host_key_attr.attr,
+	&uv_keys_all_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uv_keys_attr_group = {
+	.attrs = uv_keys_attrs,
+};
+
 static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *page)
+				     struct kobj_attribute *attr, char *buf)
 {
-	int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-	val = prot_virt_guest;
-#endif
-	return scnprintf(page, PAGE_SIZE, "%d\n", val);
+	return sysfs_emit(buf, "%d\n", prot_virt_guest);
 }
 
 static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
-				    struct kobj_attribute *attr, char *page)
+				    struct kobj_attribute *attr, char *buf)
 {
-	int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
-	val = prot_virt_host;
-#endif
-
-	return scnprintf(page, PAGE_SIZE, "%d\n", val);
+	return sysfs_emit(buf, "%d\n", prot_virt_host);
 }
 
 static struct kobj_attribute uv_prot_virt_guest =
@@ -480,9 +786,27 @@ static const struct attribute *uv_prot_virt_attrs[] = {
 };
 
 static struct kset *uv_query_kset;
+static struct kset *uv_keys_kset;
 static struct kobject *uv_kobj;
 
-static int __init uv_info_init(void)
+static int __init uv_sysfs_dir_init(const struct attribute_group *grp,
+				    struct kset **uv_dir_kset, const char *name)
+{
+	struct kset *kset;
+	int rc;
+
+	kset = kset_create_and_add(name, NULL, uv_kobj);
+	if (!kset)
+		return -ENOMEM;
+	*uv_dir_kset = kset;
+
+	rc = sysfs_create_group(&kset->kobj, grp);
+	if (rc)
+		kset_unregister(kset);
+	return rc;
+}
+
+static int __init uv_sysfs_init(void)
 {
 	int rc = -ENOMEM;
 
@@ -497,17 +821,16 @@ static int __init uv_info_init(void)
 	if (rc)
 		goto out_kobj;
 
-	uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
-	if (!uv_query_kset) {
-		rc = -ENOMEM;
+	rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query");
+	if (rc)
 		goto out_ind_files;
-	}
 
-	rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
-	if (!rc)
-		return 0;
+	/* Get installed key hashes if available, ignore any errors */
+	if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list))
+		uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys");
+
+	return 0;
 
-	kset_unregister(uv_query_kset);
 out_ind_files:
 	sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
 out_kobj:
@@ -515,5 +838,110 @@ out_kobj:
 	kobject_put(uv_kobj);
 	return rc;
 }
-device_initcall(uv_info_init);
-#endif
+device_initcall(uv_sysfs_init);
+
+/*
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
+ *
+ * Context: might sleep.
+ */
+static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
+			       const struct uv_secret_list *list,
+			       struct uv_secret_list_item_hdr *secret)
+{
+	u16 i;
+
+	for (i = 0; i < list->total_num_secrets; i++) {
+		if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) {
+			*secret = list->secrets[i].hdr;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+/*
+ * Do the actual search for `uv_get_secret_metadata`.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Context: might sleep.
+ */
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+		   struct uv_secret_list *list,
+		   struct uv_secret_list_item_hdr *secret)
+{
+	u16 start_idx = 0;
+	u16 list_rc;
+	int ret;
+
+	do {
+		uv_list_secrets(list, start_idx, &list_rc, NULL);
+		if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) {
+			if (list_rc == UVC_RC_INV_CMD)
+				return -ENODEV;
+			else
+				return -EIO;
+		}
+		ret = find_secret_in_page(secret_id, list, secret);
+		if (ret == 0)
+			return ret;
+		start_idx = list->next_secret_idx;
+	} while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx);
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(uv_find_secret);
+
+/**
+ * uv_retrieve_secret() - get the secret value for the secret index.
+ * @secret_idx: Secret index for which the secret should be retrieved.
+ * @buf: Buffer to store retrieved secret.
+ * @buf_size: Size of the buffer. The correct buffer size is reported as part of
+ * the result from `uv_get_secret_metadata`.
+ *
+ * Calls the Retrieve Secret UVC and translates the UV return code into an errno.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0		- Entry found; buffer contains a valid secret.
+ * * %ENOENT:	- No entry found or secret at the index is non-retrievable.
+ * * %ENODEV:	- Not supported: UV not available or command not available.
+ * * %EINVAL:	- Buffer too small for content.
+ * * %EIO:	- Other unexpected UV error.
+ */
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size)
+{
+	struct uv_cb_retr_secr uvcb = {
+		.header.len = sizeof(uvcb),
+		.header.cmd = UVC_CMD_RETR_SECRET,
+		.secret_idx = secret_idx,
+		.buf_addr = (u64)buf,
+		.buf_size = buf_size,
+	};
+
+	uv_call_sched(0, (u64)&uvcb);
+
+	switch (uvcb.header.rc) {
+	case UVC_RC_EXECUTED:
+		return 0;
+	case UVC_RC_INV_CMD:
+		return -ENODEV;
+	case UVC_RC_RETR_SECR_STORE_EMPTY:
+	case UVC_RC_RETR_SECR_INV_SECRET:
+	case UVC_RC_RETR_SECR_INV_IDX:
+		return -ENOENT;
+	case UVC_RC_RETR_SECR_BUF_SMALL:
+		return -EINVAL;
+	default:
+		return -EIO;
+	}
+}
+EXPORT_SYMBOL_GPL(uv_retrieve_secret);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 5075cde77b29..430feb1a5013 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -12,126 +12,20 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/time_namespace.h>
 #include <linux/random.h>
+#include <linux/vdso_datastore.h>
 #include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
 #include <asm/vdso.h>
 
 extern char vdso64_start[], vdso64_end[];
 extern char vdso32_start[], vdso32_end[];
 
-static struct vm_special_mapping vvar_mapping;
-
-static union {
-	struct vdso_data	data[CS_BASES];
-	u8			page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-
-struct vdso_data *vdso_data = vdso_data_store.data;
-
-enum vvar_pages {
-	VVAR_DATA_PAGE_OFFSET,
-	VVAR_TIMENS_PAGE_OFFSET,
-	VVAR_NR_PAGES,
-};
-
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
-	return (struct vdso_data *)(vvar_page);
-}
-
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	if (likely(vma->vm_mm == current->mm))
-		return current->nsproxy->time_ns->vvar_page;
-	/*
-	 * VM_PFNMAP | VM_IO protect .fault() handler from being called
-	 * through interfaces like /proc/$pid/mem or
-	 * process_vm_{readv,writev}() as long as there's no .access()
-	 * in special_mapping_vmops().
-	 * For more details check_vma_flags() and __access_remote_vm()
-	 */
-	WARN(1, "vvar_page accessed remotely");
-	return NULL;
-}
-
-/*
- * The VVAR page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
-	struct mm_struct *mm = task->mm;
-	struct vm_area_struct *vma;
-
-	mmap_read_lock(mm);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		unsigned long size = vma->vm_end - vma->vm_start;
-
-		if (!vma_is_special_mapping(vma, &vvar_mapping))
-			continue;
-		zap_page_range(vma, vma->vm_start, size);
-		break;
-	}
-	mmap_read_unlock(mm);
-	return 0;
-}
-#else
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
-	return NULL;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
-			     struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *timens_page = find_timens_vvar_page(vma);
-	unsigned long addr, pfn;
-	vm_fault_t err;
-
-	switch (vmf->pgoff) {
-	case VVAR_DATA_PAGE_OFFSET:
-		pfn = virt_to_pfn(vdso_data);
-		if (timens_page) {
-			/*
-			 * Fault in VVAR page too, since it will be accessed
-			 * to get clock data anyway.
-			 */
-			addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
-			err = vmf_insert_pfn(vma, addr, pfn);
-			if (unlikely(err & VM_FAULT_ERROR))
-				return err;
-			pfn = page_to_pfn(timens_page);
-		}
-		break;
-#ifdef CONFIG_TIME_NS
-	case VVAR_TIMENS_PAGE_OFFSET:
-		/*
-		 * If a task belongs to a time namespace then a namespace
-		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
-		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
-		 * offset.
-		 * See also the comment near timens_setup_vdso_data().
-		 */
-		if (!timens_page)
-			return VM_FAULT_SIGBUS;
-		pfn = virt_to_pfn(vdso_data);
-		break;
-#endif /* CONFIG_TIME_NS */
-	default:
-		return VM_FAULT_SIGBUS;
-	}
-	return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
 static int vdso_mremap(const struct vm_special_mapping *sm,
 		       struct vm_area_struct *vma)
 {
@@ -139,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static struct vm_special_mapping vvar_mapping = {
-	.name = "[vvar]",
-	.fault = vvar_fault,
-};
-
 static struct vm_special_mapping vdso64_mapping = {
 	.name = "[vdso]",
 	.mremap = vdso_mremap,
@@ -169,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
 	struct vm_area_struct *vma;
 	int rc;
 
-	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+	BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
@@ -184,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
 	rc = vvar_start;
 	if (IS_ERR_VALUE(vvar_start))
 		goto out;
-	vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
-				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
-				       VM_PFNMAP,
-				       &vvar_mapping);
+	vma = vdso_install_vvar_mapping(mm, vvar_start);
 	rc = PTR_ERR(vma);
 	if (IS_ERR(vma))
 		goto out;
-	vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+	vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE;
 	/* VM_MAYWRITE for COW so gdb can set breakpoints */
 	vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
-				       VM_READ|VM_EXEC|
+				       VM_READ|VM_EXEC|VM_SEALED_SYSMAP|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 				       vdso_mapping);
 	if (IS_ERR(vma)) {
@@ -226,7 +112,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
 	end -= len;
 
 	if (end > start) {
-		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+		offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
 		addr = start + (offset << PAGE_SHIFT);
 	} else {
 		addr = start;
@@ -234,17 +120,22 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
 	return addr;
 }
 
-unsigned long vdso_size(void)
+unsigned long vdso_text_size(void)
 {
-	unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+	unsigned long size;
 
 	if (is_compat_task())
-		size += vdso32_end - vdso32_start;
+		size = vdso32_end - vdso32_start;
 	else
-		size += vdso64_end - vdso64_start;
+		size = vdso64_end - vdso64_start;
 	return PAGE_ALIGN(size);
 }
 
+unsigned long vdso_size(void)
+{
+	return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE;
+}
+
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	unsigned long addr = VDSO_BASE;
@@ -269,8 +160,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
 	return pagelist;
 }
 
+static void vdso_apply_alternatives(void)
+{
+	const struct elf64_shdr *alt, *shdr;
+	struct alt_instr *start, *end;
+	const struct elf64_hdr *hdr;
+
+	hdr = (struct elf64_hdr *)vdso64_start;
+	shdr = (void *)hdr + hdr->e_shoff;
+	alt = find_section(hdr, shdr, ".altinstructions");
+	if (!alt)
+		return;
+	start = (void *)hdr + alt->sh_offset;
+	end = (void *)hdr + alt->sh_offset + alt->sh_size;
+	apply_alternatives(start, end);
+}
+
 static int __init vdso_init(void)
 {
+	vdso_apply_alternatives();
 	vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
 	if (IS_ENABLED(CONFIG_COMPAT))
 		vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 245bddfe9bc0..1e4ddd1a683f 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,11 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # List of files in the vdso
 
-KCOV_INSTRUMENT := n
-ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
-ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-
-include $(srctree)/lib/vdso/Makefile
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
 obj-vdso32 = vdso_user_wrapper-32.o note-32.o
 
 # Build rules
@@ -20,9 +17,12 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
 KBUILD_AFLAGS_32 += -m31 -s
 
 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables
 
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
 	--hash-style=both --build-id=sha1 -melf_s390 -T
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
@@ -32,17 +32,14 @@ obj-y += vdso32_wrapper.o
 targets += vdso32.lds
 CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
 # Force dependency (incbin is bad)
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
-	$(call if_changed,ld)
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE
+	$(call if_changed,vdso_and_check)
 
 # strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -58,18 +55,8 @@ quiet_cmd_vdso32as = VDSO32A $@
 quiet_cmd_vdso32cc = VDSO32C $@
       cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
 
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso32.so
-
 # Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
 quiet_cmd_vdsosym = VDSOSYM $@
 	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
 
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index edf5ff1debe1..9630d58c2080 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -6,18 +6,16 @@
 
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <vdso/datapage.h>
 
 OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
 OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
 
 SECTIONS
 {
-	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
-	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
-	. = VDSO_LBASE + SIZEOF_HEADERS;
+	VDSO_VVAR_SYMS
+
+	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
 	.gnu.hash	: { *(.gnu.hash) }
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
index 3f42f27f978c..2e645003fdaf 100644
--- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
@@ -1,12 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#include <linux/linkage.h>
 #include <asm/unistd.h>
 #include <asm/dwarf.h>
 
 .macro vdso_syscall func,syscall
 	.globl __kernel_compat_\func
 	.type  __kernel_compat_\func,@function
-	.align 8
+	__ALIGN
 __kernel_compat_\func:
 	CFI_STARTPROC
 	svc	\syscall
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 9e2b95a222a9..d8f0df742809 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,17 +1,19 @@
 # SPDX-License-Identifier: GPL-2.0
 # List of files in the vdso
 
-KCOV_INSTRUMENT := n
-ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
-ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-
-include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
+obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
 CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
 CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
 
+ifneq ($(c-getrandom-y),)
+	CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
 # Build rules
 
 targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
@@ -22,11 +24,15 @@ KBUILD_AFLAGS += -DBUILD_VDSO
 KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
 
 KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_64 += -m64 -s
+KBUILD_AFLAGS_64 += -m64
 
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
+ldflags-y := -shared -soname=linux-vdso64.so.1 \
 	     --hash-style=both --build-id=sha1 -T
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
@@ -36,18 +42,15 @@ obj-y += vdso64_wrapper.o
 targets += vdso64.lds
 CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
 # Force dependency (incbin is bad)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
-	$(call if_changed,ld)
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+	$(call if_changed,vdso_and_check)
 
 # strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -67,18 +70,8 @@ quiet_cmd_vdso64as = VDSO64A $@
 quiet_cmd_vdso64cc = VDSO64C $@
       cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
 
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso64.so
-
 # Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
 quiet_cmd_vdsosym = VDSOSYM $@
 	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
 
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
index 34c7a2312f9d..9e5397e7b590 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
 int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
 int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
 int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
 
 #endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 4461ea151e49..e4f6551ae898 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -4,20 +4,19 @@
  * library
  */
 
+#include <asm/vdso/vsyscall.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <vdso/datapage.h>
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
 OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
 
 SECTIONS
 {
-	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
-	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
-	. = VDSO_LBASE + SIZEOF_HEADERS;
+	VDSO_VVAR_SYMS
+
+	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text
 	.gnu.hash	: { *(.gnu.hash) }
@@ -43,6 +42,10 @@ SECTIONS
 	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
 	.rodata1	: { *(.rodata1) }
 
+	. = ALIGN(8);
+	.altinstructions	: { *(.altinstructions) }
+	.altinstr_replacement	: { *(.altinstr_replacement) }
+
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
 	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
@@ -141,6 +144,7 @@ VERSION
 		__kernel_restart_syscall;
 		__kernel_rt_sigreturn;
 		__kernel_sigreturn;
+		__kernel_getrandom;
 	local: *;
 	};
 }
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 97f0c0a669a5..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -1,12 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
 #include <asm/vdso.h>
 #include <asm/unistd.h>
 #include <asm/asm-offsets.h>
 #include <asm/dwarf.h>
 #include <asm/ptrace.h>
 
-#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
-
 /*
  * Older glibc version called vdso without allocating a stackframe. This wrapper
  * is just used to allocate a stackframe. See
@@ -14,23 +13,23 @@
  * for details.
  */
 .macro vdso_func func
-	.globl __kernel_\func
-	.type  __kernel_\func,@function
-	.align 8
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
 	CFI_STARTPROC
-	aghi	%r15,-WRAPPER_FRAME_SIZE
-	CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	stg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	aghi	%r15,-STACK_FRAME_VDSO_OVERHEAD
+	CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
+	CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD
+	stg	%r14,__SFVDSO_RETURN_ADDRESS(%r15)
+	CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS
+	xc	__SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15)
 	brasl	%r14,__s390_vdso_\func
-	lg	%r14,STACK_FRAME_OVERHEAD(%r15)
-	aghi	%r15,WRAPPER_FRAME_SIZE
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+	lg	%r14,__SFVDSO_RETURN_ADDRESS(%r15)
+	CFI_RESTORE 14
+	aghi	%r15,STACK_FRAME_VDSO_OVERHEAD
+	CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD
 	CFI_RESTORE 15
 	br	%r14
 	CFI_ENDPROC
-	.size	__kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
 .endm
 
 vdso_func gettimeofday
@@ -39,16 +38,13 @@ vdso_func clock_gettime
 vdso_func getcpu
 
 .macro vdso_syscall func,syscall
-	.globl __kernel_\func
-	.type  __kernel_\func,@function
-	.align 8
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
 	CFI_STARTPROC
 	svc	\syscall
 	/* Make sure we notice when a syscall returns, which shouldn't happen */
 	.word	0
 	CFI_ENDPROC
-	.size	__kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
 .endm
 
 vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0	%v0
+#define STATE1	%v1
+#define STATE2	%v2
+#define STATE3	%v3
+#define COPY0	%v4
+#define COPY1	%v5
+#define COPY2	%v6
+#define COPY3	%v7
+#define BEPERM	%v19
+#define TMP0	%v20
+#define TMP1	%v21
+#define TMP2	%v22
+#define TMP3	%v23
+
+	.section .rodata
+
+	.balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+	.text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ *				       const uint8_t *key,
+ *				       uint32_t *counter,
+ *				       size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+	CFI_STARTPROC
+	larl	%r1,chacha20_constants
+
+	/* COPY0 = "expand 32-byte k" */
+	VL	COPY0,0,,%r1
+
+	/* BEPERM = byte selectors for VPERM */
+	ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+	/* COPY1,COPY2 = key */
+	VLM	COPY1,COPY2,0,%r3
+
+	/* COPY3 = counter || zero nonce  */
+	lg	%r3,0(%r4)
+	VZERO	COPY3
+	VLVGG	COPY3,%r3,0
+
+	lghi	%r1,0
+.Lblock:
+	VLR	STATE0,COPY0
+	VLR	STATE1,COPY1
+	VLR	STATE2,COPY2
+	VLR	STATE3,COPY3
+
+	lghi	%r0,10
+.Ldoubleround:
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,16
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,8
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,7
+
+	/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+	VSLDB	STATE1,STATE1,STATE1,4
+	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+	VSLDB	STATE2,STATE2,STATE2,8
+	/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+	VSLDB	STATE3,STATE3,STATE3,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,16
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,12
+
+	/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+	VAF	STATE0,STATE0,STATE1
+	VX	STATE3,STATE3,STATE0
+	VERLLF	STATE3,STATE3,8
+
+	/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+	VAF	STATE2,STATE2,STATE3
+	VX	STATE1,STATE1,STATE2
+	VERLLF	STATE1,STATE1,7
+
+	/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+	VSLDB	STATE1,STATE1,STATE1,12
+	/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+	VSLDB	STATE2,STATE2,STATE2,8
+	/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+	VSLDB	STATE3,STATE3,STATE3,4
+	brctg	%r0,.Ldoubleround
+
+	/* OUTPUT0 = STATE0 + COPY0 */
+	VAF	STATE0,STATE0,COPY0
+	/* OUTPUT1 = STATE1 + COPY1 */
+	VAF	STATE1,STATE1,COPY1
+	/* OUTPUT2 = STATE2 + COPY2 */
+	VAF	STATE2,STATE2,COPY2
+	/* OUTPUT3 = STATE3 + COPY3 */
+	VAF	STATE3,STATE3,COPY3
+
+	ALTERNATIVE							\
+		__stringify(						\
+		/* Convert STATE to little endian and store to OUTPUT */\
+		VPERM	TMP0,STATE0,STATE0,BEPERM;			\
+		VPERM	TMP1,STATE1,STATE1,BEPERM;			\
+		VPERM	TMP2,STATE2,STATE2,BEPERM;			\
+		VPERM	TMP3,STATE3,STATE3,BEPERM;			\
+		VSTM	TMP0,TMP3,0,%r2),				\
+		__stringify(						\
+		/* 32 bit wise little endian store to OUTPUT */		\
+		VSTBRF	STATE0,0,,%r2;					\
+		VSTBRF	STATE1,16,,%r2;					\
+		VSTBRF	STATE2,32,,%r2;					\
+		VSTBRF	STATE3,48,,%r2;					\
+		brcl	0,0),						\
+		ALT_FACILITY(148)
+
+	/* ++COPY3.COUNTER */
+	/* alsih %r3,1 */
+	.insn	rilu,0xcc0a00000000,%r3,1
+	alcr	%r3,%r1
+	VLVGG	COPY3,%r3,0
+
+	/* OUTPUT += 64, --NBLOCKS */
+	aghi	%r2,64
+	brctg	%r5,.Lblock
+
+	/* COUNTER = COPY3.COUNTER */
+	stg	%r3,0(%r4)
+
+	/* Zero out potentially sensitive regs */
+	VZERO	STATE0
+	VZERO	STATE1
+	VZERO	STATE2
+	VZERO	STATE3
+	VZERO	COPY1
+	VZERO	COPY2
+
+	/* Early exit if TMP0-TMP3 have not been used */
+	ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+	VZERO	TMP0
+	VZERO	TMP1
+	VZERO	TMP2
+	VZERO	TMP3
+
+	br	%r14
+	CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+	if (test_facility(129))
+		return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+	if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+		return -ENOSYS;
+	return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
new file mode 100644
index 000000000000..cc8933e04ff7
--- /dev/null
+++ b/arch/s390/kernel/vmcore_info.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	struct lowcore *abs_lc;
+
+	VMCOREINFO_SYMBOL(lowcore_ptr);
+	VMCOREINFO_SYMBOL(high_memory);
+	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+	vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+	vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base);
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys);
+	abs_lc = get_abs_lowcore();
+	abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+	put_abs_lowcore(abs_lc);
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 2e526f11b91e..ff1ddba96352 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -14,9 +14,13 @@
 #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \
 			   *(.bss..invalid_pg_dir)
 
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
 /* Handle ro_after_init data on our own. */
 #define RO_AFTER_INIT_DATA
 
+#define RUNTIME_DISCARD_EXIT
+
 #define EMITS_PT_NOTE
 
 #include <asm-generic/vmlinux.lds.h>
@@ -35,14 +39,13 @@ PHDRS {
 
 SECTIONS
 {
-	. = 0x100000;
+	. = TEXT_OFFSET;
 	.text : {
 		_stext = .;		/* Start of text section */
 		_text = .;		/* Text and read-only data */
 		HEAD_TEXT
 		TEXT_TEXT
 		SCHED_TEXT
-		CPUIDLE_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
 		IRQENTRY_TEXT
@@ -65,11 +68,22 @@ SECTIONS
 		 *(.data..ro_after_init)
 		JUMP_TABLE_DATA
 	} :data
-	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
 
+	.data.rel.ro : {
+		*(.data.rel.ro .data.rel.ro.*)
+	}
+	.got : {
+		__got_start = .;
+		*(.got)
+		__got_end = .;
+	}
+
 	RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+	.data.rel : {
+		*(.data.rel*)
+	}
 	BOOT_DATA_PRESERVED
 
 	. = ALIGN(8);
@@ -79,6 +93,7 @@ SECTIONS
 		_end_amode31_refs = .;
 	}
 
+	. = ALIGN(PAGE_SIZE);
 	_edata = .;		/* End of data section */
 
 	/* will be freed after init */
@@ -131,6 +146,7 @@ SECTIONS
 	/*
 	 * Table with the patch locations to undo expolines
 	*/
+	. = ALIGN(4);
 	.nospec_call_table : {
 		__nospec_call_start = . ;
 		*(.s390_indirect*)
@@ -167,31 +183,23 @@ SECTIONS
 	.amode31.data : {
 		*(.amode31.data)
 	}
-	. = ALIGN(PAGE_SIZE);
+	. = _samode31 + AMODE31_SIZE;
 	_eamode31 = .;
 
 	/* early.c uses stsi, which requires page aligned data. */
 	. = ALIGN(PAGE_SIZE);
 	INIT_DATA_SECTION(0x100)
 
-	PERCPU_SECTION(0x100)
+	RUNTIME_CONST_VARIABLES
 
-	.dynsym ALIGN(8) : {
-		__dynsym_start = .;
-		*(.dynsym)
-		__dynsym_end = .;
-	}
-	.rela.dyn ALIGN(8) : {
-		__rela_dyn_start = .;
-		*(.rela*)
-		__rela_dyn_end = .;
-	}
+	PERCPU_SECTION(0x100)
 
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
 	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
 
+	. = ALIGN(PAGE_SIZE);
 	_end = . ;
 
 	/*
@@ -199,7 +207,6 @@ SECTIONS
 	 * it should match struct vmlinux_info
 	 */
 	.vmlinux.info 0 (INFO) : {
-		QUAD(_stext)					/* default_lma */
 		QUAD(startup_continue)				/* entry */
 		QUAD(__bss_start - _stext)			/* image_size */
 		QUAD(__bss_stop - __bss_start)			/* bss_size */
@@ -208,10 +215,21 @@ SECTIONS
 		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
 		QUAD(__boot_data_preserved_end -
 		     __boot_data_preserved_start)		/* bootdata_preserved_size */
-		QUAD(__dynsym_start)				/* dynsym_start */
-		QUAD(__rela_dyn_start)				/* rela_dyn_start */
-		QUAD(__rela_dyn_end)				/* rela_dyn_end */
+		QUAD(__got_start)				/* got_start */
+		QUAD(__got_end)					/* got_end */
 		QUAD(_eamode31 - _samode31)			/* amode31_size */
+		QUAD(init_mm)
+		QUAD(swapper_pg_dir)
+		QUAD(invalid_pg_dir)
+		QUAD(__alt_instructions)
+		QUAD(__alt_instructions_end)
+#ifdef CONFIG_KASAN
+		QUAD(kasan_early_shadow_page)
+		QUAD(kasan_early_shadow_pte)
+		QUAD(kasan_early_shadow_pmd)
+		QUAD(kasan_early_shadow_pud)
+		QUAD(kasan_early_shadow_p4d)
+#endif
 	} :NONE
 
 	/* Debugging sections.	*/
@@ -219,9 +237,32 @@ SECTIONS
 	DWARF_DEBUG
 	ELF_DETAILS
 
+	/*
+	 * Make sure that the .got.plt is either completely empty or it
+	 * contains only the three reserved double words.
+	 */
+	.got.plt : {
+		*(.got.plt)
+	}
+	ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+	/*
+	 * Sections that should stay zero sized, which is safer to
+	 * explicitly check instead of blindly discarding.
+	 */
+	.plt : {
+		*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+	}
+	ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+	.rela.dyn : {
+		*(.rela.*) *(.rela_*)
+	}
+	ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
 	/* Sections to be discarded */
 	DISCARDS
 	/DISCARD/ : {
 		*(.eh_frame)
+		*(.interp)
 	}
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9436f3053b88..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -7,13 +7,13 @@
  */
 
 #include <linux/kernel_stat.h>
-#include <linux/sched/cputime.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/timex.h>
 #include <linux/types.h>
 #include <linux/time.h>
 #include <asm/alternative.h>
+#include <asm/cputime.h>
 #include <asm/vtimer.h>
 #include <asm/vtime.h>
 #include <asm/cpu_mf.h>
@@ -33,24 +33,17 @@ static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
 
-static inline u64 get_vtimer(void)
-{
-	u64 timer;
-
-	asm volatile("stpt %0" : "=Q" (timer));
-	return timer;
-}
-
 static inline void set_vtimer(u64 expires)
 {
+	struct lowcore *lc = get_lowcore();
 	u64 timer;
 
 	asm volatile(
 		"	stpt	%0\n"	/* Store current cpu timer value */
 		"	spt	%1"	/* Set new value imm. afterwards */
 		: "=Q" (timer) : "Q" (expires));
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
-	S390_lowcore.last_update_timer = expires;
+	lc->system_timer += lc->last_update_timer - timer;
+	lc->last_update_timer = expires;
 }
 
 static inline int virt_timer_forward(u64 elapsed)
@@ -125,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
 static int do_account_vtime(struct task_struct *tsk)
 {
 	u64 timer, clock, user, guest, system, hardirq, softirq;
+	struct lowcore *lc = get_lowcore();
 
-	timer = S390_lowcore.last_update_timer;
-	clock = S390_lowcore.last_update_clock;
+	timer = lc->last_update_timer;
+	clock = lc->last_update_clock;
 	asm volatile(
 		"	stpt	%0\n"	/* Store current cpu timer value */
 		"	stckf	%1"	/* Store current tod clock value */
-		: "=Q" (S390_lowcore.last_update_timer),
-		  "=Q" (S390_lowcore.last_update_clock)
+		: "=Q" (lc->last_update_timer),
+		  "=Q" (lc->last_update_clock)
 		: : "cc");
-	clock = S390_lowcore.last_update_clock - clock;
-	timer -= S390_lowcore.last_update_timer;
+	clock = lc->last_update_clock - clock;
+	timer -= lc->last_update_timer;
 
 	if (hardirq_count())
-		S390_lowcore.hardirq_timer += timer;
+		lc->hardirq_timer += timer;
 	else
-		S390_lowcore.system_timer += timer;
+		lc->system_timer += timer;
 
 	/* Update MT utilization calculation */
 	if (smp_cpu_mtid &&
@@ -149,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk)
 
 	/* Calculate cputime delta */
 	user = update_tsk_timer(&tsk->thread.user_timer,
-				READ_ONCE(S390_lowcore.user_timer));
+				READ_ONCE(lc->user_timer));
 	guest = update_tsk_timer(&tsk->thread.guest_timer,
-				 READ_ONCE(S390_lowcore.guest_timer));
+				 READ_ONCE(lc->guest_timer));
 	system = update_tsk_timer(&tsk->thread.system_timer,
-				  READ_ONCE(S390_lowcore.system_timer));
+				  READ_ONCE(lc->system_timer));
 	hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
-				   READ_ONCE(S390_lowcore.hardirq_timer));
+				   READ_ONCE(lc->hardirq_timer));
 	softirq = update_tsk_timer(&tsk->thread.softirq_timer,
-				   READ_ONCE(S390_lowcore.softirq_timer));
-	S390_lowcore.steal_timer +=
+				   READ_ONCE(lc->softirq_timer));
+	lc->steal_timer +=
 		clock - user - guest - system - hardirq - softirq;
 
 	/* Push account value */
@@ -184,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk)
 
 void vtime_task_switch(struct task_struct *prev)
 {
+	struct lowcore *lc = get_lowcore();
+
 	do_account_vtime(prev);
-	prev->thread.user_timer = S390_lowcore.user_timer;
-	prev->thread.guest_timer = S390_lowcore.guest_timer;
-	prev->thread.system_timer = S390_lowcore.system_timer;
-	prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
-	prev->thread.softirq_timer = S390_lowcore.softirq_timer;
-	S390_lowcore.user_timer = current->thread.user_timer;
-	S390_lowcore.guest_timer = current->thread.guest_timer;
-	S390_lowcore.system_timer = current->thread.system_timer;
-	S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
-	S390_lowcore.softirq_timer = current->thread.softirq_timer;
+	prev->thread.user_timer = lc->user_timer;
+	prev->thread.guest_timer = lc->guest_timer;
+	prev->thread.system_timer = lc->system_timer;
+	prev->thread.hardirq_timer = lc->hardirq_timer;
+	prev->thread.softirq_timer = lc->softirq_timer;
+	lc->user_timer = current->thread.user_timer;
+	lc->guest_timer = current->thread.guest_timer;
+	lc->system_timer = current->thread.system_timer;
+	lc->hardirq_timer = current->thread.hardirq_timer;
+	lc->softirq_timer = current->thread.softirq_timer;
 }
 
 /*
@@ -204,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev)
  */
 void vtime_flush(struct task_struct *tsk)
 {
+	struct lowcore *lc = get_lowcore();
 	u64 steal, avg_steal;
 
 	if (do_account_vtime(tsk))
 		virt_timer_expire();
 
-	steal = S390_lowcore.steal_timer;
-	avg_steal = S390_lowcore.avg_steal_timer / 2;
+	steal = lc->steal_timer;
+	avg_steal = lc->avg_steal_timer;
 	if ((s64) steal > 0) {
-		S390_lowcore.steal_timer = 0;
+		lc->steal_timer = 0;
 		account_steal_time(cputime_to_nsecs(steal));
 		avg_steal += steal;
 	}
-	S390_lowcore.avg_steal_timer = avg_steal;
+	lc->avg_steal_timer = avg_steal / 2;
 }
 
 static u64 vtime_delta(void)
 {
-	u64 timer = S390_lowcore.last_update_timer;
-
-	S390_lowcore.last_update_timer = get_vtimer();
+	struct lowcore *lc = get_lowcore();
+	u64 timer = lc->last_update_timer;
 
-	return timer - S390_lowcore.last_update_timer;
+	lc->last_update_timer = get_cpu_timer();
+	return timer - lc->last_update_timer;
 }
 
 /*
@@ -234,12 +231,13 @@ static u64 vtime_delta(void)
  */
 void vtime_account_kernel(struct task_struct *tsk)
 {
+	struct lowcore *lc = get_lowcore();
 	u64 delta = vtime_delta();
 
 	if (tsk->flags & PF_VCPU)
-		S390_lowcore.guest_timer += delta;
+		lc->guest_timer += delta;
 	else
-		S390_lowcore.system_timer += delta;
+		lc->system_timer += delta;
 
 	virt_timer_forward(delta);
 }
@@ -249,7 +247,7 @@ void vtime_account_softirq(struct task_struct *tsk)
 {
 	u64 delta = vtime_delta();
 
-	S390_lowcore.softirq_timer += delta;
+	get_lowcore()->softirq_timer += delta;
 
 	virt_timer_forward(delta);
 }
@@ -258,7 +256,7 @@ void vtime_account_hardirq(struct task_struct *tsk)
 {
 	u64 delta = vtime_delta();
 
-	S390_lowcore.hardirq_timer += delta;
+	get_lowcore()->hardirq_timer += delta;
 
 	virt_timer_forward(delta);
 }
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+	unsigned long	missed;
+	unsigned long	addr;
+	pid_t		pid;
+};
+
+struct wti_state {
+	/* debug data for s390dbf */
+	struct wti_debug	dbg;
+	/*
+	 * Represents the real-time thread responsible to
+	 * acknowledge the warning-track interrupt and trigger
+	 * preliminary and postliminary precautions.
+	 */
+	struct task_struct	*thread;
+	/*
+	 * If pending is true, the real-time thread must be scheduled.
+	 * If not, a wake up of that thread will remain a noop.
+	 */
+	bool			pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+	unsigned long flags;
+	struct ctlreg cr6;
+
+	local_irq_save(flags);
+	local_ctl_store(6, &cr6);
+	/* disable all I/O interrupts */
+	cr6.val &= ~0xff000000UL;
+	local_ctl_load(6, &cr6);
+	local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+	unsigned long flags;
+	struct ctlreg cr6;
+
+	local_irq_save(flags);
+	local_ctl_store(6, &cr6);
+	/* enable all I/O interrupts */
+	cr6.val |= 0xff000000UL;
+	local_ctl_load(6, &cr6);
+	local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+	struct pt_regs *regs = get_irq_regs();
+
+	st->dbg.pid = current->pid;
+	st->dbg.addr = 0;
+	if (!user_mode(regs))
+		st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+			  unsigned int param32, unsigned long param64)
+{
+	struct wti_state *st = this_cpu_ptr(&wti_state);
+
+	inc_irq_stat(IRQEXT_WTI);
+	wti_irq_disable();
+	store_debug_data(st);
+	st->pending = true;
+	wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+	struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+	return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+	struct wti_debug *wdi = &st->dbg;
+	char buf[WTI_DBF_LEN];
+
+	if (wdi->addr)
+		snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+	else
+		snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+	debug_text_event(wti_dbg, 2, buf);
+	wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+	struct wti_state *st;
+	int cpu;
+
+	cpus_read_lock();
+	seq_puts(seq, "       ");
+	for_each_online_cpu(cpu)
+		seq_printf(seq, "CPU%-8d", cpu);
+	seq_putc(seq, '\n');
+	for_each_online_cpu(cpu) {
+		st = per_cpu_ptr(&wti_state, cpu);
+		seq_printf(seq, " %10lu", st->dbg.missed);
+	}
+	seq_putc(seq, '\n');
+	cpus_read_unlock();
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+	struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+	st->pending = false;
+	/*
+	 * Yield CPU voluntarily to the hypervisor. Control
+	 * resumes when hypervisor decides to dispatch CPU
+	 * to this LPAR again.
+	 */
+	if (diag49c(DIAG49C_SUBC_ACK))
+		wti_dbf_grace_period(st);
+	wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+	.store			= &wti_state.thread,
+	.thread_should_run	= wti_pending,
+	.thread_fn		= wti_thread_fn,
+	.thread_comm		= "cpuwti/%u",
+	.selfparking		= false,
+};
+
+static int __init wti_init(void)
+{
+	struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct dentry *wti_dir;
+	struct wti_state *st;
+	int cpu, rc;
+
+	rc = -EOPNOTSUPP;
+	if (!sclp.has_wti)
+		goto out;
+	rc = smpboot_register_percpu_thread(&wti_threads);
+	if (WARN_ON(rc))
+		goto out;
+	for_each_online_cpu(cpu) {
+		st = per_cpu_ptr(&wti_state, cpu);
+		sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+	}
+	rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+	if (rc) {
+		pr_warn("Couldn't request external interrupt 0x1007\n");
+		goto out_thread;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+	rc = diag49c(DIAG49C_SUBC_REG);
+	if (rc) {
+		pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+		rc = -EOPNOTSUPP;
+		goto out_subclass;
+	}
+	wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+	debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+	wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+	if (!wti_dbg) {
+		rc = -ENOMEM;
+		goto out_debug_register;
+	}
+	rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+	if (rc)
+		goto out_debug_register;
+	goto out;
+out_debug_register:
+	debug_unregister(wti_dbg);
+out_subclass:
+	irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+	unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+	smpboot_unregister_percpu_thread(&wti_threads);
+out:
+	return rc;
+}
+late_initcall(wti_init);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 2e84d3922f7c..cae908d64550 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -19,21 +19,17 @@ if VIRTUALIZATION
 config KVM
 	def_tristate y
 	prompt "Kernel-based Virtual Machine (KVM) support"
-	depends on HAVE_KVM
-	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_VCPU_ASYNC_IOCTL
-	select HAVE_KVM_EVENTFD
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
+	select KVM_COMMON
 	select HAVE_KVM_IRQCHIP
-	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_INVALID_WAKEUPS
 	select HAVE_KVM_NO_POLL
-	select SRCU
 	select KVM_VFIO
-	select INTERVAL_TREE
+	select MMU_NOTIFIER
 	help
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 26f4a74e5ce4..9a723c48b05a 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,6 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
 
+kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 807fa9da1e72..53233dec8cad 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,12 +11,30 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
 #include "trace-s390.h"
 #include "gaccess.h"
 
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+	struct kvm_memslot_iter iter;
+	struct kvm_memory_slot *slot;
+	struct kvm_memslots *slots;
+	unsigned long start, end;
+
+	slots = kvm_vcpu_memslots(vcpu);
+
+	kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+		slot = iter.slot;
+		start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+		end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+		gmap_helper_discard(vcpu->kvm->mm, start, end);
+	}
+}
+
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
 	unsigned long start, end;
@@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
 
+	mmap_read_lock(vcpu->kvm->mm);
 	/*
 	 * We checked for start >= end above, so lets check for the
 	 * fast path (no prefix swap page involved)
 	 */
 	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
-		gmap_discard(vcpu->arch.gmap, start, end);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
 	} else {
 		/*
 		 * This is slow path.  gmap_discard will check for start
@@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 		 * prefix and let gmap_discard make some of these calls
 		 * NOPs.
 		 */
-		gmap_discard(vcpu->arch.gmap, start, prefix);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
 		if (start <= prefix)
-			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+			do_discard_gfn_range(vcpu, 0, 1);
 		if (end > prefix + PAGE_SIZE)
-			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
-		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+			do_discard_gfn_range(vcpu, 1, 2);
+		do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
 	}
+	mmap_read_unlock(vcpu->kvm->mm);
 	return 0;
 }
 
@@ -77,7 +97,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	vcpu->stat.instruction_diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+	rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -102,7 +122,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
 			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-		if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+		if (!kvm_is_gpa_in_memslot(vcpu->kvm, parm.token_addr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 		vcpu->arch.pfault_token = parm.token_addr;
@@ -166,6 +186,7 @@ static int diag9c_forwarding_overrun(void)
 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu *tcpu;
+	int tcpu_cpu;
 	int tid;
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
@@ -181,14 +202,15 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 		goto no_yield;
 
 	/* target guest VCPU already running */
-	if (READ_ONCE(tcpu->cpu) >= 0) {
+	tcpu_cpu = READ_ONCE(tcpu->cpu);
+	if (tcpu_cpu >= 0) {
 		if (!diag9c_forwarding_hz || diag9c_forwarding_overrun())
 			goto no_yield;
 
 		/* target host CPU already running */
-		if (!vcpu_is_preempted(tcpu->cpu))
+		if (!vcpu_is_preempted(tcpu_cpu))
 			goto no_yield;
-		smp_yield_cpu(tcpu->cpu);
+		smp_yield_cpu(tcpu_cpu);
 		VCPU_EVENT(vcpu, 5,
 			   "diag time slice end directed to %d: yield forwarded",
 			   tid);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 227ed0009354..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -11,169 +11,14 @@
 #include <linux/err.h>
 #include <linux/pgtable.h>
 #include <linux/bitfield.h>
-
+#include <asm/access-regs.h>
+#include <asm/fault.h>
 #include <asm/gmap.h>
+#include <asm/dat-bits.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
-#include <asm/switch_to.h>
-
-union asce {
-	unsigned long val;
-	struct {
-		unsigned long origin : 52; /* Region- or Segment-Table Origin */
-		unsigned long	 : 2;
-		unsigned long g  : 1; /* Subspace Group Control */
-		unsigned long p  : 1; /* Private Space Control */
-		unsigned long s  : 1; /* Storage-Alteration-Event Control */
-		unsigned long x  : 1; /* Space-Switch-Event Control */
-		unsigned long r  : 1; /* Real-Space Control */
-		unsigned long	 : 1;
-		unsigned long dt : 2; /* Designation-Type Control */
-		unsigned long tl : 2; /* Region- or Segment-Table Length */
-	};
-};
-
-enum {
-	ASCE_TYPE_SEGMENT = 0,
-	ASCE_TYPE_REGION3 = 1,
-	ASCE_TYPE_REGION2 = 2,
-	ASCE_TYPE_REGION1 = 3
-};
 
-union region1_table_entry {
-	unsigned long val;
-	struct {
-		unsigned long rto: 52;/* Region-Table Origin */
-		unsigned long	 : 2;
-		unsigned long p  : 1; /* DAT-Protection Bit */
-		unsigned long	 : 1;
-		unsigned long tf : 2; /* Region-Second-Table Offset */
-		unsigned long i  : 1; /* Region-Invalid Bit */
-		unsigned long	 : 1;
-		unsigned long tt : 2; /* Table-Type Bits */
-		unsigned long tl : 2; /* Region-Second-Table Length */
-	};
-};
-
-union region2_table_entry {
-	unsigned long val;
-	struct {
-		unsigned long rto: 52;/* Region-Table Origin */
-		unsigned long	 : 2;
-		unsigned long p  : 1; /* DAT-Protection Bit */
-		unsigned long	 : 1;
-		unsigned long tf : 2; /* Region-Third-Table Offset */
-		unsigned long i  : 1; /* Region-Invalid Bit */
-		unsigned long	 : 1;
-		unsigned long tt : 2; /* Table-Type Bits */
-		unsigned long tl : 2; /* Region-Third-Table Length */
-	};
-};
-
-struct region3_table_entry_fc0 {
-	unsigned long sto: 52;/* Segment-Table Origin */
-	unsigned long	 : 1;
-	unsigned long fc : 1; /* Format-Control */
-	unsigned long p  : 1; /* DAT-Protection Bit */
-	unsigned long	 : 1;
-	unsigned long tf : 2; /* Segment-Table Offset */
-	unsigned long i  : 1; /* Region-Invalid Bit */
-	unsigned long cr : 1; /* Common-Region Bit */
-	unsigned long tt : 2; /* Table-Type Bits */
-	unsigned long tl : 2; /* Segment-Table Length */
-};
-
-struct region3_table_entry_fc1 {
-	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
-	unsigned long	 : 14;
-	unsigned long av : 1; /* ACCF-Validity Control */
-	unsigned long acc: 4; /* Access-Control Bits */
-	unsigned long f  : 1; /* Fetch-Protection Bit */
-	unsigned long fc : 1; /* Format-Control */
-	unsigned long p  : 1; /* DAT-Protection Bit */
-	unsigned long iep: 1; /* Instruction-Execution-Protection */
-	unsigned long	 : 2;
-	unsigned long i  : 1; /* Region-Invalid Bit */
-	unsigned long cr : 1; /* Common-Region Bit */
-	unsigned long tt : 2; /* Table-Type Bits */
-	unsigned long	 : 2;
-};
-
-union region3_table_entry {
-	unsigned long val;
-	struct region3_table_entry_fc0 fc0;
-	struct region3_table_entry_fc1 fc1;
-	struct {
-		unsigned long	 : 53;
-		unsigned long fc : 1; /* Format-Control */
-		unsigned long	 : 4;
-		unsigned long i  : 1; /* Region-Invalid Bit */
-		unsigned long cr : 1; /* Common-Region Bit */
-		unsigned long tt : 2; /* Table-Type Bits */
-		unsigned long	 : 2;
-	};
-};
-
-struct segment_entry_fc0 {
-	unsigned long pto: 53;/* Page-Table Origin */
-	unsigned long fc : 1; /* Format-Control */
-	unsigned long p  : 1; /* DAT-Protection Bit */
-	unsigned long	 : 3;
-	unsigned long i  : 1; /* Segment-Invalid Bit */
-	unsigned long cs : 1; /* Common-Segment Bit */
-	unsigned long tt : 2; /* Table-Type Bits */
-	unsigned long	 : 2;
-};
-
-struct segment_entry_fc1 {
-	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
-	unsigned long	 : 3;
-	unsigned long av : 1; /* ACCF-Validity Control */
-	unsigned long acc: 4; /* Access-Control Bits */
-	unsigned long f  : 1; /* Fetch-Protection Bit */
-	unsigned long fc : 1; /* Format-Control */
-	unsigned long p  : 1; /* DAT-Protection Bit */
-	unsigned long iep: 1; /* Instruction-Execution-Protection */
-	unsigned long	 : 2;
-	unsigned long i  : 1; /* Segment-Invalid Bit */
-	unsigned long cs : 1; /* Common-Segment Bit */
-	unsigned long tt : 2; /* Table-Type Bits */
-	unsigned long	 : 2;
-};
-
-union segment_table_entry {
-	unsigned long val;
-	struct segment_entry_fc0 fc0;
-	struct segment_entry_fc1 fc1;
-	struct {
-		unsigned long	 : 53;
-		unsigned long fc : 1; /* Format-Control */
-		unsigned long	 : 4;
-		unsigned long i  : 1; /* Segment-Invalid Bit */
-		unsigned long cs : 1; /* Common-Segment Bit */
-		unsigned long tt : 2; /* Table-Type Bits */
-		unsigned long	 : 2;
-	};
-};
-
-enum {
-	TABLE_TYPE_SEGMENT = 0,
-	TABLE_TYPE_REGION3 = 1,
-	TABLE_TYPE_REGION2 = 2,
-	TABLE_TYPE_REGION1 = 3
-};
-
-union page_table_entry {
-	unsigned long val;
-	struct {
-		unsigned long pfra : 52; /* Page-Frame Real Address */
-		unsigned long z  : 1; /* Zero Bit */
-		unsigned long i  : 1; /* Page-Invalid Bit */
-		unsigned long p  : 1; /* DAT-Protection Bit */
-		unsigned long iep: 1; /* Instruction-Execution-Protection */
-		unsigned long	 : 8;
-	};
-};
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
 
 /*
  * vaddress union in order to easily decode a virtual address into its
@@ -262,119 +107,119 @@ struct aste {
 	/* .. more fields there */
 };
 
-int ipte_lock_held(struct kvm_vcpu *vcpu)
+int ipte_lock_held(struct kvm *kvm)
 {
-	if (vcpu->arch.sie_block->eca & ECA_SII) {
+	if (sclp.has_siif) {
 		int rc;
 
-		read_lock(&vcpu->kvm->arch.sca_lock);
-		rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
-		read_unlock(&vcpu->kvm->arch.sca_lock);
+		read_lock(&kvm->arch.sca_lock);
+		rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+		read_unlock(&kvm->arch.sca_lock);
 		return rc;
 	}
-	return vcpu->kvm->arch.ipte_lock_count != 0;
+	return kvm->arch.ipte_lock_count != 0;
 }
 
-static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+static void ipte_lock_simple(struct kvm *kvm)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
-	vcpu->kvm->arch.ipte_lock_count++;
-	if (vcpu->kvm->arch.ipte_lock_count > 1)
+	mutex_lock(&kvm->arch.ipte_mutex);
+	kvm->arch.ipte_lock_count++;
+	if (kvm->arch.ipte_lock_count > 1)
 		goto out;
 retry:
-	read_lock(&vcpu->kvm->arch.sca_lock);
-	ic = kvm_s390_get_ipte_control(vcpu->kvm);
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.k) {
-			read_unlock(&vcpu->kvm->arch.sca_lock);
+			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
 			goto retry;
 		}
 		new = old;
 		new.k = 1;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
-	read_unlock(&vcpu->kvm->arch.sca_lock);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
+	read_unlock(&kvm->arch.sca_lock);
 out:
-	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+	mutex_unlock(&kvm->arch.ipte_mutex);
 }
 
-static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+static void ipte_unlock_simple(struct kvm *kvm)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
-	vcpu->kvm->arch.ipte_lock_count--;
-	if (vcpu->kvm->arch.ipte_lock_count)
+	mutex_lock(&kvm->arch.ipte_mutex);
+	kvm->arch.ipte_lock_count--;
+	if (kvm->arch.ipte_lock_count)
 		goto out;
-	read_lock(&vcpu->kvm->arch.sca_lock);
-	ic = kvm_s390_get_ipte_control(vcpu->kvm);
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
-	read_unlock(&vcpu->kvm->arch.sca_lock);
-	wake_up(&vcpu->kvm->arch.ipte_wq);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
+	read_unlock(&kvm->arch.sca_lock);
+	wake_up(&kvm->arch.ipte_wq);
 out:
-	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+	mutex_unlock(&kvm->arch.ipte_mutex);
 }
 
-static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+static void ipte_lock_siif(struct kvm *kvm)
 {
 	union ipte_control old, new, *ic;
 
 retry:
-	read_lock(&vcpu->kvm->arch.sca_lock);
-	ic = kvm_s390_get_ipte_control(vcpu->kvm);
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.kg) {
-			read_unlock(&vcpu->kvm->arch.sca_lock);
+			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
 			goto retry;
 		}
 		new = old;
 		new.k = 1;
 		new.kh++;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
-	read_unlock(&vcpu->kvm->arch.sca_lock);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
+	read_unlock(&kvm->arch.sca_lock);
 }
 
-static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+static void ipte_unlock_siif(struct kvm *kvm)
 {
 	union ipte_control old, new, *ic;
 
-	read_lock(&vcpu->kvm->arch.sca_lock);
-	ic = kvm_s390_get_ipte_control(vcpu->kvm);
+	read_lock(&kvm->arch.sca_lock);
+	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
-	read_unlock(&vcpu->kvm->arch.sca_lock);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
+	read_unlock(&kvm->arch.sca_lock);
 	if (!new.kh)
-		wake_up(&vcpu->kvm->arch.ipte_wq);
+		wake_up(&kvm->arch.ipte_wq);
 }
 
-void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm *kvm)
 {
-	if (vcpu->arch.sie_block->eca & ECA_SII)
-		ipte_lock_siif(vcpu);
+	if (sclp.has_siif)
+		ipte_lock_siif(kvm);
 	else
-		ipte_lock_simple(vcpu);
+		ipte_lock_simple(kvm);
 }
 
-void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm *kvm)
 {
-	if (vcpu->arch.sie_block->eca & ECA_SII)
-		ipte_unlock_siif(vcpu);
+	if (sclp.has_siif)
+		ipte_unlock_siif(kvm);
 	else
-		ipte_unlock_simple(vcpu);
+		ipte_unlock_simple(kvm);
 }
 
 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
@@ -391,7 +236,8 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
 	if (ar >= NUM_ACRS)
 		return -EINVAL;
 
-	save_access_regs(vcpu->run->s.regs.acrs);
+	if (vcpu->arch.acrs_loaded)
+		save_access_regs(vcpu->run->s.regs.acrs);
 	alet.val = vcpu->run->s.regs.acrs[ar];
 
 	if (ar == 0 || alet.val == 0) {
@@ -466,64 +312,53 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
 	return 0;
 }
 
-struct trans_exc_code_bits {
-	unsigned long addr : 52; /* Translation-exception Address */
-	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
-	unsigned long	   : 2;
-	unsigned long b56  : 1;
-	unsigned long	   : 3;
-	unsigned long b60  : 1;
-	unsigned long b61  : 1;
-	unsigned long as   : 2;  /* ASCE Identifier */
-};
-
-enum {
-	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
-	FSI_STORE   = 1, /* Exception was due to store operation */
-	FSI_FETCH   = 2  /* Exception was due to fetch operation */
-};
-
 enum prot_type {
 	PROT_TYPE_LA   = 0,
 	PROT_TYPE_KEYC = 1,
 	PROT_TYPE_ALC  = 2,
 	PROT_TYPE_DAT  = 3,
 	PROT_TYPE_IEP  = 4,
+	/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+	PROT_TYPE_DUMMY,
 };
 
 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
 			    enum gacc_mode mode, enum prot_type prot, bool terminate)
 {
 	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
-	struct trans_exc_code_bits *tec;
+	union teid *teid;
 
 	memset(pgm, 0, sizeof(*pgm));
 	pgm->code = code;
-	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	teid = (union teid *)&pgm->trans_exc_code;
 
 	switch (code) {
 	case PGM_PROTECTION:
 		switch (prot) {
+		case PROT_TYPE_DUMMY:
+			/* We should never get here, acts like termination */
+			WARN_ON_ONCE(1);
+			break;
 		case PROT_TYPE_IEP:
-			tec->b61 = 1;
+			teid->b61 = 1;
 			fallthrough;
 		case PROT_TYPE_LA:
-			tec->b56 = 1;
+			teid->b56 = 1;
 			break;
 		case PROT_TYPE_KEYC:
-			tec->b60 = 1;
+			teid->b60 = 1;
 			break;
 		case PROT_TYPE_ALC:
-			tec->b60 = 1;
+			teid->b60 = 1;
 			fallthrough;
 		case PROT_TYPE_DAT:
-			tec->b61 = 1;
+			teid->b61 = 1;
 			break;
 		}
 		if (terminate) {
-			tec->b56 = 0;
-			tec->b60 = 0;
-			tec->b61 = 0;
+			teid->b56 = 0;
+			teid->b60 = 0;
+			teid->b61 = 0;
 		}
 		fallthrough;
 	case PGM_ASCE_TYPE:
@@ -537,9 +372,9 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
 		 * exc_access_id has to be set to 0 for some instructions. Both
 		 * cases have to be handled by the caller.
 		 */
-		tec->addr = gva >> PAGE_SHIFT;
-		tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
-		tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+		teid->addr = gva >> PAGE_SHIFT;
+		teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
+		teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
 		fallthrough;
 	case PGM_ALEN_TRANSLATION:
 	case PGM_ALE_SEQUENCE:
@@ -619,7 +454,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  * Returns: - zero on success; @gpa contains the resulting absolute address
  *	    - a negative value if guest access failed due to e.g. broken
  *	      guest mapping
- *	    - a positve value if an access exception happened. In this case
+ *	    - a positive value if an access exception happened. In this case
  *	      the returned value is the program interruption code as defined
  *	      by the architecture
  */
@@ -642,7 +477,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
 	if (asce.r)
 		goto real_address;
-	ptr = asce.origin * PAGE_SIZE;
+	ptr = asce.rsto * PAGE_SIZE;
 	switch (asce.dt) {
 	case ASCE_TYPE_REGION1:
 		if (vaddr.rfx01 > asce.tl)
@@ -675,7 +510,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	case ASCE_TYPE_REGION1:	{
 		union region1_table_entry rfte;
 
-		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
 			return PGM_ADDRESSING;
 		if (deref_table(vcpu->kvm, ptr, &rfte.val))
 			return -EFAULT;
@@ -693,7 +528,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	case ASCE_TYPE_REGION2: {
 		union region2_table_entry rste;
 
-		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
 			return PGM_ADDRESSING;
 		if (deref_table(vcpu->kvm, ptr, &rste.val))
 			return -EFAULT;
@@ -711,7 +546,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	case ASCE_TYPE_REGION3: {
 		union region3_table_entry rtte;
 
-		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
 			return PGM_ADDRESSING;
 		if (deref_table(vcpu->kvm, ptr, &rtte.val))
 			return -EFAULT;
@@ -739,7 +574,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 	case ASCE_TYPE_SEGMENT: {
 		union segment_table_entry ste;
 
-		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
 			return PGM_ADDRESSING;
 		if (deref_table(vcpu->kvm, ptr, &ste.val))
 			return -EFAULT;
@@ -759,7 +594,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 		ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
 	}
 	}
-	if (kvm_is_error_gpa(vcpu->kvm, ptr))
+	if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
 		return PGM_ADDRESSING;
 	if (deref_table(vcpu->kvm, ptr, &pte.val))
 		return -EFAULT;
@@ -781,7 +616,7 @@ absolute_address:
 		*prot = PROT_TYPE_IEP;
 		return PGM_PROTECTION;
 	}
-	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+	if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
 		return PGM_ADDRESSING;
 	*gpa = raddr.addr;
 	return 0;
@@ -968,8 +803,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 				return rc;
 		} else {
 			gpa = kvm_s390_real_to_abs(vcpu, ga);
-			if (kvm_is_error_gpa(vcpu->kvm, gpa))
+			if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
 				rc = PGM_ADDRESSING;
+				prot = PROT_TYPE_DUMMY;
+			}
 		}
 		if (rc)
 			return trans_exc(vcpu, rc, ga, ar, mode, prot);
@@ -993,6 +830,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
 	const gfn_t gfn = gpa_to_gfn(gpa);
 	int rc;
 
+	if (!gfn_to_memslot(kvm, gfn))
+		return PGM_ADDRESSING;
 	if (mode == GACC_STORE)
 		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
 	else
@@ -1086,7 +925,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 	try_storage_prot_override = storage_prot_override_applicable(vcpu);
 	need_ipte_lock = psw_bits(*psw).dat && !asce.r;
 	if (need_ipte_lock)
-		ipte_lock(vcpu);
+		ipte_lock(vcpu->kvm);
 	/*
 	 * Since we do the access further down ultimately via a move instruction
 	 * that does key checking and returns an error in case of a protection
@@ -1112,8 +951,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 		if (rc == PGM_PROTECTION && try_storage_prot_override)
 			rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
 							data, fragment_len, PAGE_SPO_ACC);
-		if (rc == PGM_PROTECTION)
-			prot = PROT_TYPE_KEYC;
 		if (rc)
 			break;
 		len -= fragment_len;
@@ -1123,11 +960,15 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 	if (rc > 0) {
 		bool terminate = (mode == GACC_STORE) && (idx > 0);
 
+		if (rc == PGM_PROTECTION)
+			prot = PROT_TYPE_KEYC;
+		else
+			prot = PROT_TYPE_DUMMY;
 		rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
 	}
 out_unlock:
 	if (need_ipte_lock)
-		ipte_unlock(vcpu);
+		ipte_unlock(vcpu->kvm);
 	if (nr_pages > ARRAY_SIZE(gpa_array))
 		vfree(gpas);
 	return rc;
@@ -1148,10 +989,121 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 		gra += fragment_len;
 		data += fragment_len;
 	}
+	if (rc > 0)
+		vcpu->arch.pgm.code = rc;
 	return rc;
 }
 
 /**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ *       non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ *            the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ *            *@old_addr contains the value at @gpa before the attempt to
+ *            exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ *         * >0: a program interruption code indicating the reason cmpxchg could
+ *               not be attempted
+ *         * -EINVAL: address misaligned or len not power of two
+ *         * -EAGAIN: transient failure (len 1 or 2)
+ *         * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+			       __uint128_t *old_addr, __uint128_t new,
+			       u8 access_key, bool *success)
+{
+	gfn_t gfn = gpa_to_gfn(gpa);
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	bool writable;
+	hva_t hva;
+	int ret;
+
+	if (!IS_ALIGNED(gpa, len))
+		return -EINVAL;
+
+	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+	if (kvm_is_error_hva(hva))
+		return PGM_ADDRESSING;
+	/*
+	 * Check if it's a read-only memslot, even though that cannot occur
+	 * since those are unsupported.
+	 * Don't try to actually handle that case.
+	 */
+	if (!writable)
+		return -EOPNOTSUPP;
+
+	hva += offset_in_page(gpa);
+	/*
+	 * The cmpxchg_user_key macro depends on the type of "old", so we need
+	 * a case for each valid length and get some code duplication as long
+	 * as we don't introduce a new macro.
+	 */
+	switch (len) {
+	case 1: {
+		u8 old;
+
+		ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 2: {
+		u16 old;
+
+		ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 4: {
+		u32 old;
+
+		ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 8: {
+		u64 old;
+
+		ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	case 16: {
+		__uint128_t old;
+
+		ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+		*success = !ret && old == *old_addr;
+		*old_addr = old;
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+	if (*success)
+		mark_page_dirty_in_slot(kvm, slot, gfn);
+	/*
+	 * Assume that the fault is caused by protection, either key protection
+	 * or user page write protection.
+	 */
+	if (ret == -EFAULT)
+		ret = PGM_PROTECTION;
+	return ret;
+}
+
+/**
  * guest_translate_address_with_key - translate guest logical into guest absolute address
  * @vcpu: virtual cpu
  * @gva: Guest virtual address
@@ -1199,10 +1151,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
 	rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
 	if (rc)
 		return rc;
-	ipte_lock(vcpu);
+	ipte_lock(vcpu->kvm);
 	rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
 				 access_key);
-	ipte_unlock(vcpu);
+	ipte_unlock(vcpu->kvm);
 
 	return rc;
 }
@@ -1263,6 +1215,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 				  unsigned long *pgt, int *dat_protection,
 				  int *fake)
 {
+	struct kvm *kvm;
 	struct gmap *parent;
 	union asce asce;
 	union vaddress vaddr;
@@ -1271,10 +1224,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 
 	*fake = 0;
 	*dat_protection = 0;
+	kvm = sg->private;
 	parent = sg->parent;
 	vaddr.addr = saddr;
 	asce.val = sg->orig_asce;
-	ptr = asce.origin * PAGE_SIZE;
+	ptr = asce.rsto * PAGE_SIZE;
 	if (asce.r) {
 		*fake = 1;
 		ptr = 0;
@@ -1331,6 +1285,7 @@ shadow_r2t:
 		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
 		if (rc)
 			return rc;
+		kvm->stat.gmap_shadow_r1_entry++;
 	}
 		fallthrough;
 	case ASCE_TYPE_REGION2: {
@@ -1359,6 +1314,7 @@ shadow_r3t:
 		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
 		if (rc)
 			return rc;
+		kvm->stat.gmap_shadow_r2_entry++;
 	}
 		fallthrough;
 	case ASCE_TYPE_REGION3: {
@@ -1396,6 +1352,7 @@ shadow_sgt:
 		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
 		if (rc)
 			return rc;
+		kvm->stat.gmap_shadow_r3_entry++;
 	}
 		fallthrough;
 	case ASCE_TYPE_SEGMENT: {
@@ -1429,6 +1386,7 @@ shadow_pgt:
 		rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
 		if (rc)
 			return rc;
+		kvm->stat.gmap_shadow_sg_entry++;
 	}
 	}
 	/* Return the parent address of the page table */
@@ -1437,6 +1395,44 @@ shadow_pgt:
 }
 
 /**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+			     int *dat_protection, int *fake)
+{
+	unsigned long pt_index;
+	unsigned long *table;
+	struct page *page;
+	int rc;
+
+	spin_lock(&sg->guest_table_lock);
+	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+		/* Shadow page tables are full pages (pte+pgste) */
+		page = pfn_to_page(*table >> PAGE_SHIFT);
+		pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+		*pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+		*fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+		rc = 0;
+	} else  {
+		rc = -EAGAIN;
+	}
+	spin_unlock(&sg->guest_table_lock);
+	return rc;
+}
+
+/**
  * kvm_s390_shadow_fault - handle fault on a shadow page table
  * @vcpu: virtual cpu
  * @sg: pointer to the shadow guest address space structure
@@ -1459,15 +1455,18 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 	int dat_protection, fake;
 	int rc;
 
+	if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+		return -EFAULT;
+
 	mmap_read_lock(sg->mm);
 	/*
 	 * We don't want any guest-2 tables to change - so the parent
 	 * tables/pointers we read stay valid - unshadowing is however
 	 * always possible - only guest_table_lock protects us.
 	 */
-	ipte_lock(vcpu);
+	ipte_lock(vcpu->kvm);
 
-	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+	rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
 	if (rc)
 		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
 					    &fake);
@@ -1499,7 +1498,8 @@ shadow_page:
 	pte.p |= dat_protection;
 	if (!rc)
 		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
-	ipte_unlock(vcpu);
+	vcpu->kvm->stat.gmap_shadow_pg_entry++;
+	ipte_unlock(vcpu->kvm);
 	mmap_read_unlock(sg->mm);
 	return rc;
 }
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 1124ff282012..3fde45a151f2 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 		      void *data, unsigned long len, enum gacc_mode mode);
 
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+			       __uint128_t new, u8 access_key, bool *success);
+
 /**
  * write_guest_with_key - copy data from kernel space to guest space
  * @vcpu: virtual cpu
@@ -402,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
  * @len: number of bytes to copy
  *
  * Copy @len bytes from @data (kernel space) to @gra (guest real address).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
  * Guest low address and key protection are not checked.
  *
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying from @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
  *
  * If an error occurs data may have been copied partially to guest memory.
  */
@@ -425,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
  * @len: number of bytes to copy
  *
  * Copy @len bytes from @gra (guest real address) to @data (kernel space).
- * It is up to the caller to ensure that the entire guest memory range is
- * valid memory before calling this function.
  * Guest key protection is not checked.
  *
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying to @data failed, or
+ * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
+ * is also stored to allow injecting into the guest (if applicable) using
+ * kvm_s390_inject_prog_cond().
  *
  * If an error occurs data may have been copied partially to kernel space.
  */
@@ -440,9 +445,9 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 	return access_guest_real(vcpu, gra, data, len, 0);
 }
 
-void ipte_lock(struct kvm_vcpu *vcpu);
-void ipte_unlock(struct kvm_vcpu *vcpu);
-int ipte_lock_held(struct kvm_vcpu *vcpu);
+void ipte_lock(struct kvm *kvm);
+void ipte_unlock(struct kvm *kvm);
+int ipte_lock_held(struct kvm *kvm);
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
 /* MVPG PEI indication bits */
diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c
new file mode 100644
index 000000000000..56ef153eb8fe
--- /dev/null
+++ b/arch/s390/kvm/gmap-vsie.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390 nested VMs.
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *               Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               David Hildenbrand <david@redhat.com>
+ *               Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+#include <linux/mman.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "kvm-s390.h"
+
+/**
+ * gmap_find_shadow - find a specific asce in the list of shadow tables
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns the pointer to a gmap if a shadow table with the given asce is
+ * already available, ERR_PTR(-EAGAIN) if another one is just being created,
+ * otherwise NULL
+ *
+ * Context: Called with parent->shadow_lock held
+ */
+static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+	struct gmap *sg;
+
+	lockdep_assert_held(&parent->shadow_lock);
+	list_for_each_entry(sg, &parent->children, list) {
+		if (!gmap_shadow_valid(sg, asce, edat_level))
+			continue;
+		if (!sg->initialized)
+			return ERR_PTR(-EAGAIN);
+		refcount_inc(&sg->ref_count);
+		return sg;
+	}
+	return NULL;
+}
+
+/**
+ * gmap_shadow - create/find a shadow guest address space
+ * @parent: pointer to the parent gmap
+ * @asce: ASCE for which the shadow table is created
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * The pages of the top level page table referred by the asce parameter
+ * will be set to read-only and marked in the PGSTEs of the kvm process.
+ * The shadow table will be removed automatically on any change to the
+ * PTE mapping for the source table.
+ *
+ * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
+ * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
+ * parent gmap table could not be protected.
+ */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level)
+{
+	struct gmap *sg, *new;
+	unsigned long limit;
+	int rc;
+
+	if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) ||
+	    KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private))
+		return ERR_PTR(-EFAULT);
+	spin_lock(&parent->shadow_lock);
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	spin_unlock(&parent->shadow_lock);
+	if (sg)
+		return sg;
+	/* Create a new shadow gmap */
+	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
+	if (asce & _ASCE_REAL_SPACE)
+		limit = -1UL;
+	new = gmap_alloc(limit);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+	new->mm = parent->mm;
+	new->parent = gmap_get(parent);
+	new->private = parent->private;
+	new->orig_asce = asce;
+	new->edat_level = edat_level;
+	new->initialized = false;
+	spin_lock(&parent->shadow_lock);
+	/* Recheck if another CPU created the same shadow */
+	sg = gmap_find_shadow(parent, asce, edat_level);
+	if (sg) {
+		spin_unlock(&parent->shadow_lock);
+		gmap_free(new);
+		return sg;
+	}
+	if (asce & _ASCE_REAL_SPACE) {
+		/* only allow one real-space gmap shadow */
+		list_for_each_entry(sg, &parent->children, list) {
+			if (sg->orig_asce & _ASCE_REAL_SPACE) {
+				spin_lock(&sg->guest_table_lock);
+				gmap_unshadow(sg);
+				spin_unlock(&sg->guest_table_lock);
+				list_del(&sg->list);
+				gmap_put(sg);
+				break;
+			}
+		}
+	}
+	refcount_set(&new->ref_count, 2);
+	list_add(&new->list, &parent->children);
+	if (asce & _ASCE_REAL_SPACE) {
+		/* nothing to protect, return right away */
+		new->initialized = true;
+		spin_unlock(&parent->shadow_lock);
+		return new;
+	}
+	spin_unlock(&parent->shadow_lock);
+	/* protect after insertion, so it will get properly invalidated */
+	mmap_read_lock(parent->mm);
+	rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN,
+				      ((asce & _ASCE_TABLE_LENGTH) + 1),
+				      PROT_READ, GMAP_NOTIFY_SHADOW);
+	mmap_read_unlock(parent->mm);
+	spin_lock(&parent->shadow_lock);
+	new->initialized = true;
+	if (rc) {
+		list_del(&new->list);
+		gmap_free(new);
+		new = ERR_PTR(rc);
+	}
+	spin_unlock(&parent->shadow_lock);
+	return new;
+}
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3765c4223bf9..80879fc73c90 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
 	else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
 		return -EINVAL;
 
-	bp_data = memdup_user(dbg->arch.hw_bp,
-			      sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+	bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp,
+				    sizeof(*bp_data));
 	if (IS_ERR(bp_data))
 		return PTR_ERR(bp_data);
 
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8bd42a20d924..c7908950c1f4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -94,7 +94,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.exit_validity++;
 	trace_kvm_s390_intercept_validity(vcpu, viwhy);
-	KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+	KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
 		  current->pid, vcpu->kvm);
 
 	/* do not warn on invalid runtime instrumentation mode */
@@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 		return 0;
 	if (current->thread.per_flags & PER_FLAG_NO_TE)
 		return 0;
-	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	itdb = phys_to_virt(vcpu->arch.sie_block->itdba);
 	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
 	if (rc)
 		return rc;
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+	if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) &&
+	    vcpu->arch.sie_block->iprcc != PGM_PER) {
+		/*
+		 * __vcpu_run() will exit after delivering the concurrently
+		 * indicated condition.
+		 */
+		return false;
+	}
+	return true;
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
 	psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
 	if (kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EOPNOTSUPP;
 
-	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+	if (should_handle_per_event(vcpu)) {
 		rc = kvm_s390_handle_per_event(vcpu);
 		if (rc)
 			return rc;
@@ -271,10 +286,18 @@ static int handle_prog(struct kvm_vcpu *vcpu)
  * handle_external_interrupt - used for external interruption interceptions
  * @vcpu: virtual cpu
  *
- * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
- * the new PSW does not have external interrupts disabled. In the first case,
- * we've got to deliver the interrupt manually, and in the second case, we
- * drop to userspace to handle the situation there.
+ * This interception occurs if:
+ * - the CPUSTAT_EXT_INT bit was already set when the external interrupt
+ *   occurred. In this case, the interrupt needs to be injected manually to
+ *   preserve interrupt priority.
+ * - the external new PSW has external interrupts enabled, which will cause an
+ *   interruption loop. We drop to userspace in this case.
+ *
+ * The latter case can be detected by inspecting the external mask bit in the
+ * external new psw.
+ *
+ * Under PV, only the latter case can occur, since interrupt priorities are
+ * handled in the ultravisor.
  */
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
@@ -285,10 +308,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.exit_external_interrupt++;
 
-	rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
-	if (rc)
-		return rc;
-	/* We can not handle clock comparator or timer interrupt with bad PSW */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		newpsw = vcpu->arch.sie_block->gpsw;
+	} else {
+		rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+		if (rc)
+			return rc;
+	}
+
+	/*
+	 * Clock comparator or timer interrupt with external interrupt enabled
+	 * will cause interrupt loop. Drop to userspace.
+	 */
 	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
 	    (newpsw.mask & PSW_MASK_EXT))
 		return -EOPNOTSUPP;
@@ -336,7 +367,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 					      reg2, &srcaddr, GACC_FETCH, 0);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
-	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+	rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0);
 	if (rc != 0)
 		return rc;
 
@@ -345,7 +376,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 					      reg1, &dstaddr, GACC_STORE, 0);
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
-	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+	rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE);
 	if (rc != 0)
 		return rc;
 
@@ -373,8 +404,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
  */
 int handle_sthyi(struct kvm_vcpu *vcpu)
 {
-	int reg1, reg2, r = 0;
-	u64 code, addr, cc = 0, rc = 0;
+	int reg1, reg2, cc = 0, r = 0;
+	u64 code, addr, rc = 0;
 	struct sthyi_sctns *sctns = NULL;
 
 	if (!test_kvm_facility(vcpu->kvm, 74))
@@ -405,12 +436,14 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
 		return -ENOMEM;
 
 	cc = sthyi_fill(sctns, &rc);
-
+	if (cc < 0) {
+		free_page((unsigned long)sctns);
+		return cc;
+	}
 out:
 	if (!cc) {
 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
-			memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
-			       sctns, PAGE_SIZE);
+			memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE);
 		} else {
 			r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
 			if (r) {
@@ -464,7 +497,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
 
 static int handle_pv_spx(struct kvm_vcpu *vcpu)
 {
-	u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+	u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block);
 
 	kvm_s390_set_prefix(vcpu, pref);
 	trace_kvm_s390_handle_prefix(vcpu, 1, pref);
@@ -497,7 +530,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu)
 
 static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 {
-	struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+	struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block);
 	struct uv_cb_cts uvcb = {
 		.header.cmd	= UVC_CMD_UNPIN_PAGE_SHARED,
 		.header.len	= sizeof(uvcb),
@@ -511,12 +544,12 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 			  guest_uvcb->header.cmd);
 		return 0;
 	}
-	rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+	rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
 	/*
 	 * If the unpin did not succeed, the guest will exit again for the UVC
 	 * and we will retry the unpin.
 	 */
-	if (rc == -EINVAL)
+	if (rc == -EINVAL || rc == -ENXIO)
 		return 0;
 	/*
 	 * If we got -EAGAIN here, we simply return it. It will eventually
@@ -528,16 +561,44 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 
 static int handle_pv_notification(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (vcpu->arch.sie_block->ipa == 0xb210)
 		return handle_pv_spx(vcpu);
 	if (vcpu->arch.sie_block->ipa == 0xb220)
 		return handle_pv_sclp(vcpu);
 	if (vcpu->arch.sie_block->ipa == 0xb9a4)
 		return handle_pv_uvc(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
+		/*
+		 * Besides external call, other SIGP orders also cause a
+		 * 108 (pv notify) intercept. In contrast to external call,
+		 * these orders need to be emulated and hence the appropriate
+		 * place to handle them is in handle_instruction().
+		 * So first try kvm_s390_handle_sigp_pei() and if that isn't
+		 * successful, go on with handle_instruction().
+		 */
+		ret = kvm_s390_handle_sigp_pei(vcpu);
+		if (!ret)
+			return ret;
+	}
 
 	return handle_instruction(vcpu);
 }
 
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+	/* Process PER, also if the instruction is processed in user space. */
+	if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+		return false;
+	if (rc != 0 && rc != -EOPNOTSUPP)
+		return false;
+	if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+		/* __vcpu_run() will exit after delivering the interrupt. */
+		return false;
+	return true;
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
 	int rc, per_rc = 0;
@@ -572,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		rc = handle_partial_execution(vcpu);
 		break;
 	case ICPT_KSS:
-		rc = kvm_s390_skey_check_enable(vcpu);
-		break;
+		/* Instruction will be redriven, skip the PER check. */
+		return kvm_s390_skey_check_enable(vcpu);
 	case ICPT_MCHKREQ:
 	case ICPT_INT_ENABLE:
 		/*
@@ -591,18 +652,14 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 		break;
 	case ICPT_PV_PREF:
 		rc = 0;
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu));
-		gmap_convert_to_secure(vcpu->arch.gmap,
-				       kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
+		kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
-	/* process PER, also if the instrution is processed in user space */
-	if (vcpu->arch.sie_block->icptstatus & 0x02 &&
-	    (!rc || rc == -EOPNOTSUPP))
+	if (should_handle_per_ifetch(vcpu, rc))
 		per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
 	return per_rc ? per_rc : rc;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index af96dc0549a4..60c360c18690 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -10,6 +10,7 @@
 #define KMSG_COMPONENT "kvm-s390"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/cpufeature.h>
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/hrtimer.h>
@@ -19,18 +20,20 @@
 #include <linux/slab.h>
 #include <linux/bitmap.h>
 #include <linux/vmalloc.h>
+#include <asm/access-regs.h>
 #include <asm/asm-offsets.h>
 #include <asm/dis.h>
 #include <linux/uaccess.h>
 #include <asm/sclp.h>
 #include <asm/isc.h>
 #include <asm/gmap.h>
-#include <asm/switch_to.h>
 #include <asm/nmi.h>
 #include <asm/airq.h>
+#include <asm/tpi.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
+#include "pci.h"
 
 #define PFAULT_INIT 0x0600
 #define PFAULT_DONE 0x0680
@@ -81,8 +84,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+		union esca_sigp_ctrl new_val = {0}, old_val;
 
+		old_val = READ_ONCE(*sigp_ctrl);
 		new_val.scn = src_id;
 		new_val.c = 1;
 		old_val.c = 0;
@@ -93,8 +97,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
 		union bsca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+		union bsca_sigp_ctrl new_val = {0}, old_val;
 
+		old_val = READ_ONCE(*sigp_ctrl);
 		new_val.scn = src_id;
 		new_val.c = 1;
 		old_val.c = 0;
@@ -114,8 +119,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 {
-	int rc, expect;
-
 	if (!kvm_s390_use_sca_entries())
 		return;
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
@@ -124,21 +127,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union esca_sigp_ctrl old = *sigp_ctrl;
 
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 0);
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
 		union bsca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union bsca_sigp_ctrl old = *sigp_ctrl;
 
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 0);
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
-	WARN_ON(rc != expect); /* cannot clear? */
 }
 
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
@@ -241,12 +239,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		if ((u64)gisa != word >> 32)
 			return -EBUSY;
 		_word = (word & ~0xffUL) | iam;
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 
 	return 0;
 }
@@ -264,10 +262,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		_word = word & ~(0xffUL << 24);
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 }
 
 /**
@@ -285,23 +283,18 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
 	u8 pending_mask, alert_mask;
 	u64 word, _word;
 
+	word = READ_ONCE(gi->origin->u64.word[0]);
 	do {
-		word = READ_ONCE(gi->origin->u64.word[0]);
 		alert_mask = READ_ONCE(gi->alert.mask);
 		pending_mask = (u8)(word >> 24) & alert_mask;
 		if (pending_mask)
 			return pending_mask;
 		_word = (word & ~0xffUL) | alert_mask;
-	} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
 
 	return 0;
 }
 
-static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa)
-{
-	return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa;
-}
-
 static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
 	set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@@ -312,11 +305,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa)
 	return READ_ONCE(gisa->ipm);
 }
 
-static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
-{
-	clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
-}
-
 static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
 	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
@@ -588,9 +576,9 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
 
 	mci.val = mchk->mcic;
 	/* take care of lazy register loading */
-	save_fpu_regs();
+	kvm_s390_fpu_store(vcpu->run);
 	save_access_regs(vcpu->run->s.regs.acrs);
-	if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+	if (cpu_has_gs() && vcpu->arch.gs_enabled)
 		save_gs_cb(current->thread.gs_cb);
 
 	/* Extended save area */
@@ -643,7 +631,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
 	rc |= put_guest_lc(vcpu, mci.val, (u64 __user *) __LC_MCCK_CODE);
 
 	/* Register-save areas */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
 		rc |= write_guest_lc(vcpu, __LC_FPREGS_SAVE_AREA, fprs, 128);
 	} else {
@@ -652,7 +640,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
 	}
 	rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA,
 			     vcpu->run->s.regs.gprs, 128);
-	rc |= put_guest_lc(vcpu, current->thread.fpu.fpc,
+	rc |= put_guest_lc(vcpu, vcpu->run->s.regs.fpc,
 			   (u32 __user *) __LC_FP_CREG_SAVE_AREA);
 	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr,
 			   (u32 __user *) __LC_TOD_PROGREG_SAVE_AREA);
@@ -702,7 +690,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 	/*
 	 * We indicate floating repressible conditions along with
 	 * other pending conditions. Channel Report Pending and Channel
-	 * Subsystem damage are the only two and and are indicated by
+	 * Subsystem damage are the only two and are indicated by
 	 * bits in mcic and masked in cr14.
 	 */
 	if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
@@ -961,8 +949,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
 	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
 				 (u64 *) __LC_PGM_LAST_BREAK);
-	rc |= put_guest_lc(vcpu, pgm_info.code,
-			   (u16 *)__LC_PGM_INT_CODE);
+	rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
@@ -1035,7 +1022,7 @@ static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 	ext = fi->srv_signal;
-	/* only clear the event bit */
+	/* only clear the event bits */
 	fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
 	clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
 	spin_unlock(&fi->lock);
@@ -1045,7 +1032,7 @@ static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
 					 ext.ext_params, 0);
 
-	return write_sclp(vcpu, SCCB_EVENT_PENDING);
+	return write_sclp(vcpu, ext.ext_params & SCCB_EVENT_PENDING);
 }
 
 static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1391,6 +1378,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc = 0;
+	bool delivered = false;
 	unsigned long irq_type;
 	unsigned long irqs;
 
@@ -1464,6 +1452,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 			WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
 			clear_bit(irq_type, &li->pending_irqs);
 		}
+		delivered |= !rc;
+	}
+
+	/*
+	 * We delivered at least one interrupt and modified the PC. Force a
+	 * singlestep event now.
+	 */
+	if (delivered && guestdbg_sstep_enabled(vcpu)) {
+		struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+		debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
 	}
 
 	set_intercept_indicators(vcpu);
@@ -2677,9 +2678,13 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		kvm_s390_clear_float_irqs(dev->kvm);
 		break;
 	case KVM_DEV_FLIC_APF_ENABLE:
+		if (kvm_is_ucontrol(dev->kvm))
+			return -EINVAL;
 		dev->kvm->arch.gmap->pfault_enabled = 1;
 		break;
 	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		if (kvm_is_ucontrol(dev->kvm))
+			return -EINVAL;
 		dev->kvm->arch.gmap->pfault_enabled = 0;
 		/*
 		 * Make sure no async faults are in transition when
@@ -2776,7 +2781,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
 
 	mmap_read_lock(kvm->mm);
 	get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
-			      &page, NULL, NULL);
+			      &page, NULL);
 	mmap_read_unlock(kvm->mm);
 	return page;
 }
@@ -2888,20 +2893,25 @@ int kvm_set_routing_entry(struct kvm *kvm,
 			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
-	u64 uaddr;
+	u64 uaddr_s, uaddr_i;
+	int idx;
 
 	switch (ue->type) {
 	/* we store the userspace addresses instead of the guest addresses */
 	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		if (kvm_is_ucontrol(kvm))
+			return -EINVAL;
 		e->set = set_adapter_int;
-		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
-		if (uaddr == -EFAULT)
-			return -EFAULT;
-		e->adapter.summary_addr = uaddr;
-		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
-		if (uaddr == -EFAULT)
+
+		idx = srcu_read_lock(&kvm->srcu);
+		uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
+		uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
+		srcu_read_unlock(&kvm->srcu, idx);
+
+		if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i))
 			return -EFAULT;
-		e->adapter.ind_addr = uaddr;
+		e->adapter.summary_addr = uaddr_s;
+		e->adapter.ind_addr = uaddr_i;
 		e->adapter.summary_offset = ue->u.adapter.summary_offset;
 		e->adapter.ind_offset = ue->u.adapter.ind_offset;
 		e->adapter.adapter_id = ue->u.adapter.adapter_id;
@@ -3102,9 +3112,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
 static void process_gib_alert_list(void)
 {
 	struct kvm_s390_gisa_interrupt *gi;
+	u32 final, gisa_phys, origin = 0UL;
 	struct kvm_s390_gisa *gisa;
 	struct kvm *kvm;
-	u32 final, origin = 0UL;
 
 	do {
 		/*
@@ -3130,9 +3140,10 @@ static void process_gib_alert_list(void)
 		 * interruptions asap.
 		 */
 		while (origin & GISA_ADDR_MASK) {
-			gisa = (struct kvm_s390_gisa *)(u64)origin;
+			gisa_phys = origin;
+			gisa = phys_to_virt(gisa_phys);
 			origin = gisa->next_alert;
-			gisa->next_alert = (u32)(u64)gisa;
+			gisa->next_alert = gisa_phys;
 			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
 			gi = &kvm->arch.gisa_int;
 			if (hrtimer_active(&gi->timer))
@@ -3150,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
 	if (!gi->origin)
 		return;
 	gisa_clear_ipm(gi->origin);
-	VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
+	VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
 }
 
 void kvm_s390_gisa_init(struct kvm *kvm)
@@ -3163,11 +3174,10 @@ void kvm_s390_gisa_init(struct kvm *kvm)
 	gi->alert.mask = 0;
 	spin_lock_init(&gi->alert.ref_lock);
 	gi->expires = 50 * 1000; /* 50 usec */
-	hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	gi->timer.function = gisa_vcpu_kicker;
+	hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
-	gi->origin->next_alert = (u32)(u64)gi->origin;
-	VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
+	gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
+	VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
 }
 
 void kvm_s390_gisa_enable(struct kvm *kvm)
@@ -3200,14 +3210,15 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
 
 	if (!gi->origin)
 		return;
-	if (gi->alert.mask)
-		KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x",
-			  kvm, gi->alert.mask);
-	while (gisa_in_alert_list(gi->origin))
-		cpu_relax();
+	WARN(gi->alert.mask != 0x00,
+	     "unexpected non zero alert.mask 0x%02x",
+	     gi->alert.mask);
+	gi->alert.mask = 0x00;
+	if (gisa_set_iam(gi->origin, gi->alert.mask))
+		process_gib_alert_list();
 	hrtimer_cancel(&gi->timer);
 	gi->origin = NULL;
-	VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+	VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
 }
 
 void kvm_s390_gisa_disable(struct kvm *kvm)
@@ -3311,29 +3322,111 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
 
-static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
+static void aen_host_forward(unsigned long si)
+{
+	struct kvm_s390_gisa_interrupt *gi;
+	struct zpci_gaite *gaite;
+	struct kvm *kvm;
+
+	gaite = (struct zpci_gaite *)aift->gait +
+		(si * sizeof(struct zpci_gaite));
+	if (gaite->count == 0)
+		return;
+	if (gaite->aisb != 0)
+		set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb));
+
+	kvm = kvm_s390_pci_si_to_kvm(aift, si);
+	if (!kvm)
+		return;
+	gi = &kvm->arch.gisa_int;
+
+	if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
+	    !(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
+		gisa_set_ipm_gisc(gi->origin, gaite->gisc);
+		if (hrtimer_active(&gi->timer))
+			hrtimer_cancel(&gi->timer);
+		hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+		kvm->stat.aen_forward++;
+	}
+}
+
+static void aen_process_gait(u8 isc)
 {
+	bool found = false, first = true;
+	union zpci_sic_iib iib = {{0}};
+	unsigned long si, flags;
+
+	spin_lock_irqsave(&aift->gait_lock, flags);
+
+	if (!aift->gait) {
+		spin_unlock_irqrestore(&aift->gait_lock, flags);
+		return;
+	}
+
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
+		if (si == -1UL) {
+			if (first || found) {
+				/* Re-enable interrupts. */
+				zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
+						  &iib);
+				first = found = false;
+			} else {
+				/* Interrupts on and all bits processed */
+				break;
+			}
+			found = false;
+			si = 0;
+			/* Scan again after re-enabling interrupts */
+			continue;
+		}
+		found = true;
+		aen_host_forward(si);
+	}
+
+	spin_unlock_irqrestore(&aift->gait_lock, flags);
+}
+
+static void gib_alert_irq_handler(struct airq_struct *airq,
+				  struct tpi_info *tpi_info)
+{
+	struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
+
 	inc_irq_stat(IRQIO_GAL);
-	process_gib_alert_list();
+
+	if ((info->forward || info->error) &&
+	    IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		aen_process_gait(info->isc);
+		if (info->aism != 0)
+			process_gib_alert_list();
+	} else {
+		process_gib_alert_list();
+	}
 }
 
 static struct airq_struct gib_alert_irq = {
 	.handler = gib_alert_irq_handler,
-	.lsi_ptr = &gib_alert_irq.lsi_mask,
 };
 
 void kvm_s390_gib_destroy(void)
 {
 	if (!gib)
 		return;
+	if (kvm_s390_pci_interp_allowed() && aift) {
+		mutex_lock(&aift->aift_lock);
+		kvm_s390_pci_aen_exit();
+		mutex_unlock(&aift->aift_lock);
+	}
 	chsc_sgib(0);
 	unregister_adapter_interrupt(&gib_alert_irq);
 	free_page((unsigned long)gib);
 	gib = NULL;
 }
 
-int kvm_s390_gib_init(u8 nisc)
+int __init kvm_s390_gib_init(u8 nisc)
 {
+	u32 gib_origin;
 	int rc = 0;
 
 	if (!css_general_characteristics.aiv) {
@@ -3353,9 +3446,12 @@ int kvm_s390_gib_init(u8 nisc)
 		rc = -EIO;
 		goto out_free_gib;
 	}
+	/* adapter interrupts used for AP (applicable here) don't use the LSI */
+	*gib_alert_irq.lsi_ptr = 0xff;
 
 	gib->nisc = nisc;
-	if (chsc_sgib((u32)(u64)gib)) {
+	gib_origin = virt_to_phys(gib);
+	if (chsc_sgib(gib_origin)) {
 		pr_err("Associating the GIB with the AIV facility failed\n");
 		free_page((unsigned long)gib);
 		gib = NULL;
@@ -3363,7 +3459,15 @@ int kvm_s390_gib_init(u8 nisc)
 		goto out_unreg_gal;
 	}
 
-	KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
+	if (kvm_s390_pci_interp_allowed()) {
+		if (kvm_s390_pci_aen_init(nisc)) {
+			pr_err("Initializing AEN for PCI failed\n");
+			rc = -EIO;
+			goto out_unreg_gal;
+		}
+	}
+
+	KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
 	goto out;
 
 out_unreg_gal:
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
deleted file mode 100644
index 484608c71dd0..000000000000
--- a/arch/s390/kvm/irq.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 irqchip routines
- *
- * Copyright IBM Corp. 2014
- *
- *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
- */
-#ifndef __KVM_IRQ_H
-#define __KVM_IRQ_H
-
-#include <linux/kvm_host.h>
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8fcb56141689..d5ad10791c25 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -23,6 +23,7 @@
 #include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
@@ -31,22 +32,27 @@
 #include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/pgtable.h>
+#include <linux/mmu_notifier.h>
 
+#include <asm/access-regs.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
+#include <asm/machine.h>
 #include <asm/stp.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/nmi.h>
-#include <asm/switch_to.h>
 #include <asm/isc.h>
 #include <asm/sclp.h>
 #include <asm/cpacf.h>
 #include <asm/timex.h>
+#include <asm/asm.h>
+#include <asm/fpu.h>
 #include <asm/ap.h>
 #include <asm/uv.h>
-#include <asm/fpu/api.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include "pci.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -63,7 +69,15 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_COUNTER(VM, inject_float_mchk),
 	STATS_DESC_COUNTER(VM, inject_pfault_done),
 	STATS_DESC_COUNTER(VM, inject_service_signal),
-	STATS_DESC_COUNTER(VM, inject_virtio)
+	STATS_DESC_COUNTER(VM, inject_virtio),
+	STATS_DESC_COUNTER(VM, aen_forward),
+	STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
+	STATS_DESC_COUNTER(VM, gmap_shadow_create),
+	STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
+	STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
+	STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
+	STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
+	STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
 };
 
 const struct kvm_stats_header kvm_vm_stats_header = {
@@ -122,6 +136,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, instruction_io_other),
 	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+	STATS_DESC_COUNTER(VCPU, instruction_lpswey),
 	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 	STATS_DESC_COUNTER(VCPU, instruction_ptff),
 	STATS_DESC_COUNTER(VCPU, instruction_sck),
@@ -207,6 +222,14 @@ module_param(diag9c_forwarding_hz, uint, 0644);
 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 
 /*
+ * allow asynchronous deinit for protected guests; enable by default since
+ * the feature is opt-in anyway
+ */
+static int async_destroy = 1;
+module_param(async_destroy, int, 0444);
+MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
+
+/*
  * For now we handle at most 16 double words as this is what the s390 base
  * kernel handles and stores in the prefix page. If we ever need to go beyond
  * this, this requires changes to code, but the external uapi can stay.
@@ -245,17 +268,6 @@ debug_info_t *kvm_s390_dbf;
 debug_info_t *kvm_s390_dbf_uv;
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void)
-{
-	/* every s390 is virtualization enabled ;-) */
-	return 0;
-}
-
-int kvm_arch_check_processor_compat(void *opaque)
-{
-	return 0;
-}
-
 /* forward declarations */
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 			      unsigned long end);
@@ -318,25 +330,6 @@ static struct notifier_block kvm_clock_notifier = {
 	.notifier_call = kvm_clock_sync,
 };
 
-int kvm_arch_hardware_setup(void *opaque)
-{
-	gmap_notifier.notifier_call = kvm_gmap_notifier;
-	gmap_register_pte_notifier(&gmap_notifier);
-	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
-	gmap_register_pte_notifier(&vsie_gmap_notifier);
-	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
-				       &kvm_clock_notifier);
-	return 0;
-}
-
-void kvm_arch_hardware_unsetup(void)
-{
-	gmap_unregister_pte_notifier(&gmap_notifier);
-	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
-	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
-					 &kvm_clock_notifier);
-}
-
 static void allow_cpu_feat(unsigned long nr)
 {
 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
@@ -351,30 +344,48 @@ static inline int plo_test_bit(unsigned char nr)
 		"	lgr	0,%[function]\n"
 		/* Parameter registers are ignored for "test bit" */
 		"	plo	0,0,0,0(0)\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc)
 		: [function] "d" (function)
+		: CC_CLOBBER_LIST("0"));
+	return CC_TRANSFORM(cc) == 0;
+}
+
+static __always_inline void pfcr_query(u8 (*query)[16])
+{
+	asm volatile(
+		"	lghi	0,0\n"
+		"	.insn   rsy,0xeb0000000016,0,0,%[query]\n"
+		: [query] "=QS" (*query)
+		:
 		: "cc", "0");
-	return cc == 0;
 }
 
-static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
+static __always_inline void __sortl_query(u8 (*query)[32])
 {
 	asm volatile(
 		"	lghi	0,0\n"
-		"	lgr	1,%[query]\n"
+		"	la	1,%[query]\n"
 		/* Parameter registers are ignored */
-		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
+		"	.insn	rre,0xb9380000,2,4\n"
+		: [query] "=R" (*query)
 		:
-		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
-		: "cc", "memory", "0", "1");
+		: "cc", "0", "1");
 }
 
-#define INSN_SORTL 0xb938
-#define INSN_DFLTCC 0xb939
+static __always_inline void __dfltcc_query(u8 (*query)[32])
+{
+	asm volatile(
+		"	lghi	0,0\n"
+		"	la	1,%[query]\n"
+		/* Parameter registers are ignored */
+		"	.insn	rrf,0xb9390000,2,4,6,0\n"
+		: [query] "=R" (*query)
+		:
+		: "cc", "0", "1");
+}
 
-static void kvm_s390_cpu_feat_init(void)
+static void __init kvm_s390_cpu_feat_init(void)
 {
 	int i;
 
@@ -426,18 +437,21 @@ static void kvm_s390_cpu_feat_init(void)
 			      kvm_s390_available_subfunc.kdsa);
 
 	if (test_facility(150)) /* SORTL */
-		__insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+		__sortl_query(&kvm_s390_available_subfunc.sortl);
 
 	if (test_facility(151)) /* DFLTCC */
-		__insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+		__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
+
+	if (test_facility(201))	/* PFCR */
+		pfcr_query(&kvm_s390_available_subfunc.pfcr);
 
-	if (MACHINE_HAS_ESOP)
+	if (machine_has_esop())
 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 	/*
 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 	 */
-	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+	if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
 	    !test_facility(3) || !nested)
 		return;
 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
@@ -477,7 +491,7 @@ static void kvm_s390_cpu_feat_init(void)
 	 */
 }
 
-int kvm_arch_init(void *opaque)
+static int __init __kvm_s390_init(void)
 {
 	int rc = -ENOMEM;
 
@@ -487,11 +501,11 @@ int kvm_arch_init(void *opaque)
 
 	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 	if (!kvm_s390_dbf_uv)
-		goto out;
+		goto err_kvm_uv;
 
 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
-		goto out;
+		goto err_debug_view;
 
 	kvm_s390_cpu_feat_init();
 
@@ -499,23 +513,52 @@ int kvm_arch_init(void *opaque)
 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 	if (rc) {
 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
-		goto out;
+		goto err_flic;
+	}
+
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		rc = kvm_s390_pci_init();
+		if (rc) {
+			pr_err("Unable to allocate AIFT for PCI\n");
+			goto err_pci;
+		}
 	}
 
 	rc = kvm_s390_gib_init(GAL_ISC);
 	if (rc)
-		goto out;
+		goto err_gib;
+
+	gmap_notifier.notifier_call = kvm_gmap_notifier;
+	gmap_register_pte_notifier(&gmap_notifier);
+	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+	gmap_register_pte_notifier(&vsie_gmap_notifier);
+	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+				       &kvm_clock_notifier);
 
 	return 0;
 
-out:
-	kvm_arch_exit();
+err_gib:
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_exit();
+err_pci:
+err_flic:
+err_debug_view:
+	debug_unregister(kvm_s390_dbf_uv);
+err_kvm_uv:
+	debug_unregister(kvm_s390_dbf);
 	return rc;
 }
 
-void kvm_arch_exit(void)
+static void __kvm_s390_exit(void)
 {
+	gmap_unregister_pte_notifier(&gmap_notifier);
+	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
+	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+					 &kvm_clock_notifier);
+
 	kvm_s390_gib_destroy();
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_exit();
 	debug_unregister(kvm_s390_dbf);
 	debug_unregister(kvm_s390_dbf_uv);
 }
@@ -546,7 +589,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_S390_CSS_SUPPORT:
 	case KVM_CAP_IOEVENTFD:
-	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_S390_IRQCHIP:
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
@@ -563,7 +605,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_VCPU_RESETS:
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_S390_DIAG318:
-	case KVM_CAP_S390_MEM_OP_EXTENSION:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_SET_GUEST_DEBUG2:
@@ -571,12 +613,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_S390_HPAGE_1M:
 		r = 0;
-		if (hpage && !kvm_is_ucontrol(kvm))
+		if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
 			r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
 		r = MEM_OP_MAX_SIZE;
 		break;
+	case KVM_CAP_S390_MEM_OP_EXTENSION:
+		/*
+		 * Flag bits indicating which extensions are supported.
+		 * If r > 0, the base extension must also be supported/indicated,
+		 * in order to maintain backwards compatibility.
+		 */
+		r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+		    KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 	case KVM_CAP_MAX_VCPU_ID:
@@ -589,10 +640,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			r = min_t(unsigned int, num_online_cpus(), r);
 		break;
 	case KVM_CAP_S390_COW:
-		r = MACHINE_HAS_ESOP;
+		r = machine_has_esop();
 		break;
 	case KVM_CAP_S390_VECTOR_REGISTERS:
-		r = MACHINE_HAS_VX;
+		r = test_facility(129);
 		break;
 	case KVM_CAP_S390_RI:
 		r = test_facility(64);
@@ -603,9 +654,38 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_BPB:
 		r = test_facility(82);
 		break;
+	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
+		r = async_destroy && is_prot_virt_host();
+		break;
 	case KVM_CAP_S390_PROTECTED:
 		r = is_prot_virt_host();
 		break;
+	case KVM_CAP_S390_PROTECTED_DUMP: {
+		u64 pv_cmds_dump[] = {
+			BIT_UVC_CMD_DUMP_INIT,
+			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
+			BIT_UVC_CMD_DUMP_CPU,
+			BIT_UVC_CMD_DUMP_COMPLETE,
+		};
+		int i;
+
+		r = is_prot_virt_host();
+
+		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
+			if (!test_bit_inv(pv_cmds_dump[i],
+					  (unsigned long *)&uv_info.inst_calls_list)) {
+				r = 0;
+				break;
+			}
+		}
+		break;
+	}
+	case KVM_CAP_S390_ZPCI_OP:
+		r = kvm_s390_pci_interp_allowed();
+		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = test_facility(11);
+		break;
 	default:
 		r = 0;
 	}
@@ -712,7 +792,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		mutex_lock(&kvm->lock);
 		if (kvm->created_vcpus) {
 			r = -EBUSY;
-		} else if (MACHINE_HAS_VX) {
+		} else if (cpu_has_vx()) {
 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
 			set_kvm_facility(kvm->arch.model.fac_list, 129);
 			if (test_facility(134)) {
@@ -735,6 +815,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 				set_kvm_facility(kvm->arch.model.fac_mask, 192);
 				set_kvm_facility(kvm->arch.model.fac_list, 192);
 			}
+			if (test_facility(198)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 198);
+				set_kvm_facility(kvm->arch.model.fac_list, 198);
+			}
+			if (test_facility(199)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 199);
+				set_kvm_facility(kvm->arch.model.fac_list, 199);
+			}
 			r = 0;
 		} else
 			r = -EINVAL;
@@ -817,6 +905,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		icpt_operexc_on_all_vcpus(kvm);
 		r = 0;
 		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (test_facility(11)) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 11);
+			set_kvm_facility(kvm->arch.model.fac_list, 11);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
+			 r ? "(not available)" : "(success)");
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -920,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		}
 		mutex_unlock(&kvm->lock);
 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
-		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+		VM_EVENT(kvm, 3, "New guest asce: 0x%p",
 			 (void *) kvm->arch.gmap->asce);
 		break;
 	}
@@ -1019,6 +1121,42 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 	return 0;
 }
 
+static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
+{
+	/* Only set the ECB bits after guest requests zPCI interpretation */
+	if (!vcpu->kvm->arch.use_zpci_interp)
+		return;
+
+	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
+	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
+}
+
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
+
+	lockdep_assert_held(&kvm->lock);
+
+	if (!kvm_s390_pci_interp_allowed())
+		return;
+
+	/*
+	 * If host is configured for PCI and the necessary facilities are
+	 * available, turn on interpretation for the life of this guest
+	 */
+	kvm->arch.use_zpci_interp = 1;
+
+	kvm_s390_vcpu_block_all(kvm);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_vcpu_pci_setup(vcpu);
+		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+	}
+
+	kvm_s390_vcpu_unblock_all(kvm);
+}
+
 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 {
 	unsigned long cx;
@@ -1118,6 +1256,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
 	return 0;
 }
 
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+
 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	struct kvm_s390_vm_tod_clock gtod;
@@ -1127,7 +1267,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 
 	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 		return -EINVAL;
-	kvm_s390_set_tod_clock(kvm, &gtod);
+	__kvm_s390_set_tod_clock(kvm, &gtod);
 
 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 		gtod.epoch_idx, gtod.tod);
@@ -1158,7 +1298,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 			   sizeof(gtod.tod)))
 		return -EFAULT;
 
-	kvm_s390_set_tod_clock(kvm, &gtod);
+	__kvm_s390_set_tod_clock(kvm, &gtod);
 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
 	return 0;
 }
@@ -1170,6 +1310,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (attr->flags)
 		return -EINVAL;
 
+	mutex_lock(&kvm->lock);
+	/*
+	 * For protected guests, the TOD is managed by the ultravisor, so trying
+	 * to change it will never bring the expected results.
+	 */
+	if (kvm_s390_pv_is_protected(kvm)) {
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
 	switch (attr->attr) {
 	case KVM_S390_VM_TOD_EXT:
 		ret = kvm_s390_set_tod_ext(kvm, attr);
@@ -1184,6 +1334,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 		ret = -ENXIO;
 		break;
 	}
+
+out_unlock:
+	mutex_unlock(&kvm->lock);
 	return ret;
 }
 
@@ -1414,6 +1567,42 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
+
+	return 0;
+}
+
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
+(						\
+	((struct kvm_s390_vm_cpu_uv_feat){	\
+		.ap = 1,			\
+		.ap_intr = 1,			\
+	})					\
+	.feat					\
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+	unsigned long data, filter;
+
+	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (get_user(data, &ptr->feat))
+		return -EFAULT;
+	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (kvm->created_vcpus) {
+		mutex_unlock(&kvm->lock);
+		return -EBUSY;
+	}
+	kvm->arch.model.uv_feat_guest.feat = data;
+	mutex_unlock(&kvm->lock);
+
+	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
 
 	return 0;
 }
@@ -1432,6 +1621,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_set_uv_feat(kvm, attr);
+		break;
 	}
 	return ret;
 }
@@ -1592,6 +1784,9 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
 		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
 
 	return 0;
 }
@@ -1660,6 +1855,36 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
 		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
+	VM_EVENT(kvm, 3, "GET: host  PFCR   subfunc 0x%16.16lx.%16.16lx",
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
+
+	return 0;
+}
+
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+	return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+	unsigned long feat;
+
+	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+	if (put_user(feat, &dst->feat))
+		return -EFAULT;
+	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
 
 	return 0;
 }
@@ -1687,10 +1912,67 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+		break;
 	}
 	return ret;
 }
 
+/**
+ * kvm_s390_update_topology_change_report - update CPU topology change report
+ * @kvm: guest KVM description
+ * @val: set or clear the MTCR bit
+ *
+ * Updates the Multiprocessor Topology-Change-Report bit to signal
+ * the guest with a topology change.
+ * This is only relevant if the topology facility is present.
+ *
+ * The SCA version, bsca or esca, doesn't matter as offset is the same.
+ */
+static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
+{
+	union sca_utility new, old;
+	struct bsca_block *sca;
+
+	read_lock(&kvm->arch.sca_lock);
+	sca = kvm->arch.sca;
+	old = READ_ONCE(sca->utility);
+	do {
+		new = old;
+		new.mtcr = val;
+	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
+	read_unlock(&kvm->arch.sca_lock);
+}
+
+static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
+					       struct kvm_device_attr *attr)
+{
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
+	return 0;
+}
+
+static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
+					       struct kvm_device_attr *attr)
+{
+	u8 topo;
+
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	read_lock(&kvm->arch.sca_lock);
+	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
+	read_unlock(&kvm->arch.sca_lock);
+
+	return put_user(topo, (u8 __user *)attr->addr);
+}
+
 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret;
@@ -1711,6 +1993,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = kvm_s390_vm_set_migration(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_set_topo_change_indication(kvm, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -1736,6 +2021,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = kvm_s390_vm_get_migration(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_get_topo_change_indication(kvm, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -1782,6 +2070,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 		case KVM_S390_VM_CPU_MACHINE_FEAT:
 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
 			ret = 0;
 			break;
 		default:
@@ -1809,6 +2099,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = 0;
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -1817,7 +2110,7 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	return ret;
 }
 
-static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
 	uint8_t *keys;
 	uint64_t hva;
@@ -1865,7 +2158,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 	return r;
 }
 
-static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
 	uint8_t *keys;
 	uint64_t hva;
@@ -1983,6 +2276,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
 		ofs = 0;
 	}
+
+	if (cur_gfn < ms->base_gfn)
+		ofs = 0;
+
 	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
 	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
 		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
@@ -2166,12 +2463,25 @@ out:
 	return r;
 }
 
-static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+/**
+ * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
+ * non protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Does not stop in case of error, tries to convert as many
+ * CPUs as possible. In case of error, the RC and RRC of the last error are
+ * returned.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
 	struct kvm_vcpu *vcpu;
-	u16 rc, rrc;
-	int ret = 0;
 	unsigned long i;
+	u16 _rc, _rrc;
+	int ret = 0;
 
 	/*
 	 * We ignore failures and try to destroy as many CPUs as possible.
@@ -2183,9 +2493,9 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
 	 */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		mutex_lock(&vcpu->mutex);
-		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
-			*rcp = rc;
-			*rrcp = rrc;
+		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
+			*rc = _rc;
+			*rrc = _rrc;
 			ret = -EIO;
 		}
 		mutex_unlock(&vcpu->mutex);
@@ -2196,6 +2506,17 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
 	return ret;
 }
 
+/**
+ * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
+ * to protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Tries to undo the conversion in case of error.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
 	unsigned long i;
@@ -2205,7 +2526,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 	struct kvm_vcpu *vcpu;
 
 	/* Disable the GISA if the ultravisor does not support AIV. */
-	if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+	if (!uv_has_feature(BIT_UV_FEAT_AIV))
 		kvm_s390_gisa_disable(kvm);
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -2220,11 +2541,124 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 	return r;
 }
 
+/*
+ * Here we provide user space with a direct interface to query UV
+ * related data like UV maxima and available features as well as
+ * feature specific data.
+ *
+ * To facilitate future extension of the data structures we'll try to
+ * write data up to the maximum requested length.
+ */
+static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
+{
+	ssize_t len_min;
+
+	switch (info->header.id) {
+	case KVM_PV_INFO_VM: {
+		len_min =  sizeof(info->header) + sizeof(info->vm);
+
+		if (info->header.len_max < len_min)
+			return -EINVAL;
+
+		memcpy(info->vm.inst_calls_list,
+		       uv_info.inst_calls_list,
+		       sizeof(uv_info.inst_calls_list));
+
+		/* It's max cpuid not max cpus, so it's off by one */
+		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
+		info->vm.max_guests = uv_info.max_num_sec_conf;
+		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
+		info->vm.feature_indication = uv_info.uv_feature_indications;
+
+		return len_min;
+	}
+	case KVM_PV_INFO_DUMP: {
+		len_min =  sizeof(info->header) + sizeof(info->dump);
+
+		if (info->header.len_max < len_min)
+			return -EINVAL;
+
+		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
+		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
+		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
+		return len_min;
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
+			   struct kvm_s390_pv_dmp dmp)
+{
+	int r = -EINVAL;
+	void __user *result_buff = (void __user *)dmp.buff_addr;
+
+	switch (dmp.subcmd) {
+	case KVM_PV_DUMP_INIT: {
+		if (kvm->arch.pv.dumping)
+			break;
+
+		/*
+		 * Block SIE entry as concurrent dump UVCs could lead
+		 * to validities.
+		 */
+		kvm_s390_vcpu_block_all(kvm);
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		if (!r) {
+			kvm->arch.pv.dumping = true;
+		} else {
+			kvm_s390_vcpu_unblock_all(kvm);
+			r = -EINVAL;
+		}
+		break;
+	}
+	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
+		if (!kvm->arch.pv.dumping)
+			break;
+
+		/*
+		 * gaddr is an output parameter since we might stop
+		 * early. As dmp will be copied back in our caller, we
+		 * don't need to do it ourselves.
+		 */
+		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
+						&cmd->rc, &cmd->rrc);
+		break;
+	}
+	case KVM_PV_DUMP_COMPLETE: {
+		if (!kvm->arch.pv.dumping)
+			break;
+
+		r = -EINVAL;
+		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
+			break;
+
+		r = kvm_s390_pv_dump_complete(kvm, result_buff,
+					      &cmd->rc, &cmd->rrc);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+
+	return r;
+}
+
 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 {
+	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
+	void __user *argp = (void __user *)cmd->data;
 	int r = 0;
 	u16 dummy;
-	void __user *argp = (void __user *)cmd->data;
+
+	if (need_lock)
+		mutex_lock(&kvm->lock);
 
 	switch (cmd->cmd) {
 	case KVM_PV_ENABLE: {
@@ -2240,9 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		if (r)
 			break;
 
-		mmap_write_lock(current->mm);
-		r = gmap_mark_unmergeable();
-		mmap_write_unlock(current->mm);
+		mmap_write_lock(kvm->mm);
+		r = gmap_helper_disable_cow_sharing();
+		mmap_write_unlock(kvm->mm);
 		if (r)
 			break;
 
@@ -2258,6 +2692,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
 		break;
 	}
+	case KVM_PV_ASYNC_CLEANUP_PREPARE:
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
+			break;
+
+		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		/*
+		 * If a CPU could not be destroyed, destroy VM will also fail.
+		 * There is no point in trying to destroy it. Instead return
+		 * the rc and rrc from the first CPU that failed destroying.
+		 */
+		if (r)
+			break;
+		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
+
+		/* no need to block service interrupts any more */
+		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	case KVM_PV_ASYNC_CLEANUP_PERFORM:
+		r = -EINVAL;
+		if (!async_destroy)
+			break;
+		/* kvm->lock must not be held; this is asserted inside the function. */
+		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
+		break;
 	case KVM_PV_DISABLE: {
 		r = -EINVAL;
 		if (!kvm_s390_pv_is_protected(kvm))
@@ -2271,7 +2730,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		 */
 		if (r)
 			break;
-		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
 
 		/* no need to block service interrupts any more */
 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
@@ -2356,47 +2815,104 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 			     cmd->rc, cmd->rrc);
 		break;
 	}
+	case KVM_PV_INFO: {
+		struct kvm_s390_pv_info info = {};
+		ssize_t data_len;
+
+		/*
+		 * No need to check the VM protection here.
+		 *
+		 * Maybe user space wants to query some of the data
+		 * when the VM is still unprotected. If we see the
+		 * need to fence a new data command we can still
+		 * return an error in the info handler.
+		 */
+
+		r = -EFAULT;
+		if (copy_from_user(&info, argp, sizeof(info.header)))
+			break;
+
+		r = -EINVAL;
+		if (info.header.len_max < sizeof(info.header))
+			break;
+
+		data_len = kvm_s390_handle_pv_info(&info);
+		if (data_len < 0) {
+			r = data_len;
+			break;
+		}
+		/*
+		 * If a data command struct is extended (multiple
+		 * times) this can be used to determine how much of it
+		 * is valid.
+		 */
+		info.header.len_written = data_len;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &info, data_len))
+			break;
+
+		r = 0;
+		break;
+	}
+	case KVM_PV_DUMP: {
+		struct kvm_s390_pv_dmp dmp;
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&dmp, argp, sizeof(dmp)))
+			break;
+
+		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
+		if (r)
+			break;
+
+		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
+			r = -EFAULT;
+			break;
+		}
+
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
-	return r;
-}
+	if (need_lock)
+		mutex_unlock(&kvm->lock);
 
-static bool access_key_invalid(u8 access_key)
-{
-	return access_key > 0xf;
+	return r;
 }
 
-static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
 {
-	void __user *uaddr = (void __user *)mop->buf;
-	u64 supported_flags;
-	void *tmpbuf = NULL;
-	int r, srcu_idx;
-
-	supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
-			  | KVM_S390_MEMOP_F_CHECK_ONLY;
 	if (mop->flags & ~supported_flags || !mop->size)
 		return -EINVAL;
 	if (mop->size > MEM_OP_MAX_SIZE)
 		return -E2BIG;
-	/*
-	 * This is technically a heuristic only, if the kvm->lock is not
-	 * taken, it is not guaranteed that the vm is/remains non-protected.
-	 * This is ok from a kernel perspective, wrongdoing is detected
-	 * on the access, -EFAULT is returned and the vm may crash the
-	 * next time it accesses the memory in question.
-	 * There is no sane usecase to do switching and a memop on two
-	 * different CPUs at the same time.
-	 */
-	if (kvm_s390_pv_get_handle(kvm))
-		return -EINVAL;
 	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
-		if (access_key_invalid(mop->key))
+		if (mop->key > 0xf)
 			return -EINVAL;
 	} else {
 		mop->key = 0;
 	}
+	return 0;
+}
+
+static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	enum gacc_mode acc_mode;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
+					KVM_S390_MEMOP_F_CHECK_ONLY);
+	if (r)
+		return r;
+
 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
 		tmpbuf = vmalloc(mop->size);
 		if (!tmpbuf)
@@ -2405,40 +2921,30 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
-	if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
 		r = PGM_ADDRESSING;
 		goto out_unlock;
 	}
 
-	switch (mop->op) {
-	case KVM_S390_MEMOP_ABSOLUTE_READ: {
-		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
-		} else {
-			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
-						      mop->size, GACC_FETCH, mop->key);
-			if (r == 0) {
-				if (copy_to_user(uaddr, tmpbuf, mop->size))
-					r = -EFAULT;
-			}
-		}
-		break;
+	acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
+	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+		r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
+		goto out_unlock;
 	}
-	case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
-		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
-		} else {
-			if (copy_from_user(tmpbuf, uaddr, mop->size)) {
-				r = -EFAULT;
-				break;
-			}
-			r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
-						      mop->size, GACC_STORE, mop->key);
+	if (acc_mode == GACC_FETCH) {
+		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+					      mop->size, GACC_FETCH, mop->key);
+		if (r)
+			goto out_unlock;
+		if (copy_to_user(uaddr, tmpbuf, mop->size))
+			r = -EFAULT;
+	} else {
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			goto out_unlock;
 		}
-		break;
-	}
-	default:
-		r = -EINVAL;
+		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+					      mop->size, GACC_STORE, mop->key);
 	}
 
 out_unlock:
@@ -2448,8 +2954,76 @@ out_unlock:
 	return r;
 }
 
-long kvm_arch_vm_ioctl(struct file *filp,
-		       unsigned int ioctl, unsigned long arg)
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void __user *old_addr = (void __user *)mop->old_addr;
+	union {
+		__uint128_t quad;
+		char raw[sizeof(__uint128_t)];
+	} old = { .quad = 0}, new = { .quad = 0 };
+	unsigned int off_in_quad = sizeof(new) - mop->size;
+	int r, srcu_idx;
+	bool success;
+
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+	if (r)
+		return r;
+	/*
+	 * This validates off_in_quad. Checking that size is a power
+	 * of two is not necessary, as cmpxchg_guest_abs_with_key
+	 * takes care of that
+	 */
+	if (mop->size > sizeof(new))
+		return -EINVAL;
+	if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+		return -EFAULT;
+	if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+		return -EFAULT;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
+		r = PGM_ADDRESSING;
+		goto out_unlock;
+	}
+
+	r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+				       new.quad, mop->key, &success);
+	if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+		r = -EFAULT;
+
+out_unlock:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return r;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+	/*
+	 * This is technically a heuristic only, if the kvm->lock is not
+	 * taken, it is not guaranteed that the vm is/remains non-protected.
+	 * This is ok from a kernel perspective, wrongdoing is detected
+	 * on the access, -EFAULT is returned and the vm may crash the
+	 * next time it accesses the memory in question.
+	 * There is no sane usecase to do switching and a memop on two
+	 * different CPUs at the same time.
+	 */
+	if (kvm_s390_pv_get_handle(kvm))
+		return -EINVAL;
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_ABSOLUTE_READ:
+	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+		return kvm_s390_vm_mem_op_abs(kvm, mop);
+	case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+		return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
 	struct kvm *kvm = filp->private_data;
 	void __user *argp = (void __user *)arg;
@@ -2467,14 +3041,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_CREATE_IRQCHIP: {
-		struct kvm_irq_routing_entry routing;
-
 		r = -EINVAL;
-		if (kvm->arch.use_irqchip) {
-			/* Set up dummy routing. */
-			memset(&routing, 0, sizeof(routing));
-			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
-		}
+		if (kvm->arch.use_irqchip)
+			r = 0;
 		break;
 	}
 	case KVM_SET_DEVICE_ATTR: {
@@ -2563,9 +3132,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
-		mutex_lock(&kvm->lock);
+		/* must be called without kvm->lock */
 		r = kvm_s390_handle_pv(kvm, &args);
-		mutex_unlock(&kvm->lock);
 		if (copy_to_user(argp, &args, sizeof(args))) {
 			r = -EFAULT;
 			break;
@@ -2581,6 +3149,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+	case KVM_S390_ZPCI_OP: {
+		struct kvm_s390_zpci_op args;
+
+		r = -EINVAL;
+		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+			break;
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		r = kvm_s390_pci_zpci_op(kvm, &args);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -2610,7 +3191,7 @@ static int kvm_s390_apxa_installed(void)
  */
 static void kvm_s390_set_crycb_format(struct kvm *kvm)
 {
-	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+	kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
 
 	/* Clear the CRYCB format bits - i.e., set format 0 by default */
 	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
@@ -2742,6 +3323,14 @@ static void sca_dispose(struct kvm *kvm)
 	kvm->arch.sca = NULL;
 }
 
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+		kvm_s390_pci_clear_list(kvm);
+
+	__kvm_arch_free_vm(kvm);
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
@@ -2811,7 +3400,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	/* we emulate STHYI in kvm */
 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
 	set_kvm_facility(kvm->arch.model.fac_list, 74);
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
 		set_kvm_facility(kvm->arch.model.fac_list, 147);
 	}
@@ -2822,8 +3411,17 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
+	kvm->arch.model.uv_feat_guest.feat = 0;
+
 	kvm_s390_crypto_init(kvm);
 
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+		mutex_lock(&kvm->lock);
+		kvm_s390_pci_init_list(kvm);
+		kvm_s390_vcpu_pci_enable_interp(kvm);
+		mutex_unlock(&kvm->lock);
+	}
+
 	mutex_init(&kvm->arch.float_int.ais_lock);
 	spin_lock_init(&kvm->arch.float_int.lock);
 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2835,8 +3433,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
 
 	if (type & KVM_VM_S390_UCONTROL) {
+		struct kvm_userspace_memory_region2 fake_memslot = {
+			.slot = KVM_S390_UCONTROL_MEMSLOT,
+			.guest_phys_addr = 0,
+			.userspace_addr = 0,
+			.memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+			.flags = 0,
+		};
+
 		kvm->arch.gmap = NULL;
 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+		/* one flat fake memslot covering the whole address-space */
+		mutex_lock(&kvm->slots_lock);
+		KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+		mutex_unlock(&kvm->slots_lock);
 	} else {
 		if (sclp.hamax == U64_MAX)
 			kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -2856,7 +3466,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_s390_vsie_init(kvm);
 	if (use_gisa)
 		kvm_s390_gisa_init(kvm);
-	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+	kvm->arch.pv.set_aside = NULL;
+	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
 
 	return 0;
 out_err:
@@ -2877,6 +3489,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		sca_del_vcpu(vcpu);
+	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_remove(vcpu->arch.gmap);
@@ -2899,11 +3512,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	/*
 	 * We are already at the end of life and kvm->lock is not taken.
 	 * This is ok as the file descriptor is closed by now and nobody
-	 * can mess with the pv state. To avoid lockdep_assert_held from
-	 * complaining we do not use kvm_s390_pv_is_protected.
+	 * can mess with the pv state.
 	 */
-	if (kvm_s390_pv_get_handle(kvm))
-		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
+	/*
+	 * Remove the mmu notifier only when the whole KVM VM is torn down,
+	 * and only if one was registered to begin with. If the VM is
+	 * currently not protected, but has been previously been protected,
+	 * then it's possible that the notifier is still registered.
+	 */
+	if (kvm->arch.pv.mmu_notifier.ops)
+		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
+
 	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)kvm->arch.sie_page2);
 	if (!kvm_is_ucontrol(kvm))
@@ -2911,7 +3531,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
 	kvm_s390_vsie_destroy(kvm);
-	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
 }
 
 /* Section: vcpu related */
@@ -2947,28 +3567,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
 {
 	if (!kvm_s390_use_sca_entries()) {
-		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
 
 		/* we still need the basic sca for the ipte control */
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
 		return;
 	}
 	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
 
-		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
 
-		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
@@ -2999,6 +3621,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	unsigned long vcpu_idx;
 	u32 scaol, scaoh;
+	phys_addr_t new_sca_phys;
 
 	if (kvm->arch.use_esca)
 		return 0;
@@ -3007,8 +3630,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
 	if (!new_sca)
 		return -ENOMEM;
 
-	scaoh = (u32)((u64)(new_sca) >> 32);
-	scaol = (u32)(u64)(new_sca) & ~0x3fU;
+	new_sca_phys = virt_to_phys(new_sca);
+	scaoh = new_sca_phys >> 32;
+	scaol = new_sca_phys & ESCA_SCAOL_MASK;
 
 	kvm_s390_vcpu_block_all(kvm);
 	write_lock(&kvm->arch.sca_lock);
@@ -3028,7 +3652,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
 
 	free_page((unsigned long)old_sca);
 
-	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+	VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
 		 old_sca, kvm->arch.sca);
 	return 0;
 }
@@ -3047,9 +3671,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
 	if (!sclp.has_esca || !sclp.has_64bscao)
 		return false;
 
-	mutex_lock(&kvm->lock);
 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
-	mutex_unlock(&kvm->lock);
 
 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
 }
@@ -3144,7 +3766,6 @@ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 
-	gmap_enable(vcpu->arch.enabled_gmap);
 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
 		__start_cpu_timer_accounting(vcpu);
@@ -3157,8 +3778,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
 		__stop_cpu_timer_accounting(vcpu);
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
-	vcpu->arch.enabled_gmap = gmap_get_enabled();
-	gmap_disable(vcpu->arch.enabled_gmap);
 
 }
 
@@ -3176,8 +3795,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	}
 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
-	/* make vcpu_load load the right gmap on the first trigger */
-	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
 }
 
 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
@@ -3199,6 +3816,13 @@ static bool kvm_has_pckmo_ecc(struct kvm *kvm)
 
 }
 
+static bool kvm_has_pckmo_hmac(struct kvm *kvm)
+{
+	/* At least one HMAC subfunction must be present */
+	return kvm_has_pckmo_subfunc(kvm, 118) ||
+	       kvm_has_pckmo_subfunc(kvm, 122);
+}
+
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -3211,7 +3835,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
 	vcpu->arch.sie_block->eca &= ~ECA_APIE;
-	vcpu->arch.sie_block->ecd &= ~ECD_ECC;
+	vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
 
 	if (vcpu->kvm->arch.crypto.apie)
 		vcpu->arch.sie_block->eca |= ECA_APIE;
@@ -3219,9 +3843,11 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 	/* Set up protected key support */
 	if (vcpu->kvm->arch.crypto.aes_kw) {
 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
-		/* ecc is also wrapped with AES key */
+		/* ecc/hmac is also wrapped with AES key */
 		if (kvm_has_pckmo_ecc(vcpu->kvm))
 			vcpu->arch.sie_block->ecd |= ECD_ECC;
+		if (kvm_has_pckmo_hmac(vcpu->kvm))
+			vcpu->arch.sie_block->ecd |= ECD_HMAC;
 	}
 
 	if (vcpu->kvm->arch.crypto.dea_kw)
@@ -3230,15 +3856,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
 {
-	free_page(vcpu->arch.sie_block->cbrlo);
+	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
 	vcpu->arch.sie_block->cbrlo = 0;
 }
 
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!vcpu->arch.sie_block->cbrlo)
+	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+
+	if (!cbrlo_page)
 		return -ENOMEM;
+
+	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
 	return 0;
 }
 
@@ -3248,7 +3877,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.sie_block->ibc = model->ibc;
 	if (test_kvm_facility(vcpu->kvm, 7))
-		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
+		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
 }
 
 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -3267,11 +3896,13 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	kvm_s390_vcpu_setup_model(vcpu);
 
-	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
-	if (MACHINE_HAS_ESOP)
+	/* pgste_set_pte has special handling for !machine_has_esop() */
+	if (machine_has_esop())
 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
 	if (test_kvm_facility(vcpu->kvm, 9))
 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
+	if (test_kvm_facility(vcpu->kvm, 11))
+		vcpu->arch.sie_block->ecb |= ECB_PTF;
 	if (test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= ECB_TE;
 	if (!kvm_is_ucontrol(vcpu->kvm))
@@ -3303,9 +3934,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
 	}
-	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
-					| SDNXC;
-	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
+	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
+	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
 
 	if (sclp.has_kss)
 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
@@ -3317,13 +3947,15 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 		if (rc)
 			return rc;
 	}
-	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+	hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
+		      HRTIMER_MODE_REL);
 
 	vcpu->arch.sie_block->hpid = HPID_KVM;
 
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
+	kvm_s390_vcpu_pci_setup(vcpu);
+
 	mutex_lock(&vcpu->kvm->lock);
 	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
 		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
@@ -3353,7 +3985,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		return -ENOMEM;
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
-	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
 
 	/* the real guest size will always be smaller than msl */
 	vcpu->arch.sie_block->mso = 0;
@@ -3373,6 +4005,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT |
 				    KVM_SYNC_DIAG318;
+	vcpu->arch.acrs_loaded = false;
 	kvm_s390_set_prefix(vcpu, 0);
 	if (test_kvm_facility(vcpu->kvm, 64))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
@@ -3383,9 +4016,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (test_kvm_facility(vcpu->kvm, 156))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
-	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+	 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
 	 */
-	if (MACHINE_HAS_VX)
+	if (cpu_has_vx())
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 	else
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
@@ -3396,13 +4029,15 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 			goto out_free_sie_block;
 	}
 
-	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
 		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
 
 	rc = kvm_s390_vcpu_setup(vcpu);
 	if (rc)
 		goto out_ucontrol_uninit;
+
+	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
 	return 0;
 
 out_ucontrol_uninit:
@@ -3479,6 +4114,8 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 	unsigned long prefix;
 	unsigned long i;
 
+	trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
+
 	if (gmap_is_shadow(gmap))
 		return;
 	if (start >= 1UL << 31)
@@ -3498,7 +4135,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
 {
 	/* do not poll with more than halt_poll_max_steal percent of steal time */
-	if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+	if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
 	    READ_ONCE(halt_poll_max_steal)) {
 		vcpu->stat.halt_no_poll_steal++;
 		return true;
@@ -3660,7 +4297,7 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->run->s.regs.fpc = 0;
 	/*
 	 * Do not reset these registers in the protected case, as some of
-	 * them are overlayed and they are not accessible in this case
+	 * them are overlaid and they are not accessible in this case
 	 * anyway.
 	 */
 	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
@@ -3733,18 +4370,13 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
 	vcpu_load(vcpu);
 
-	if (test_fp_ctl(fpu->fpc)) {
-		ret = -EINVAL;
-		goto out;
-	}
 	vcpu->run->s.regs.fpc = fpu->fpc;
-	if (MACHINE_HAS_VX)
+	if (cpu_has_vx())
 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
 				 (freg_t *) fpu->fprs);
 	else
 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 
-out:
 	vcpu_put(vcpu);
 	return ret;
 }
@@ -3753,9 +4385,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	vcpu_load(vcpu);
 
-	/* make sure we have the latest values */
-	save_fpu_regs();
-	if (MACHINE_HAS_VX)
+	if (cpu_has_vx())
 		convert_vx_to_fp((freg_t *) fpu->fprs,
 				 (__vector128 *) vcpu->run->s.regs.vrs);
 	else
@@ -3885,6 +4515,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
 	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
 }
 
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+	struct kvm *kvm = gmap->private;
+	gfn_t gfn = gpa_to_gfn(gaddr);
+	bool unlocked;
+	hva_t vmaddr;
+	gpa_t tmp;
+	int rc;
+
+	if (kvm_is_ucontrol(kvm)) {
+		tmp = __gmap_translate(gmap, gaddr);
+		gfn = gpa_to_gfn(tmp);
+	}
+
+	vmaddr = gfn_to_hva(kvm, gfn);
+	rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+	if (!rc)
+		rc = __gmap_link(gmap, gaddr, vmaddr);
+	return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+			     unsigned long bits)
+{
+	unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+	gpa_t end = gpa + npages * PAGE_SIZE;
+	int rc;
+
+	for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+		rc = gmap_protect_one(gmap, gpa, prot, bits);
+		if (rc == -EAGAIN) {
+			__kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+			rc = gmap_protect_one(gmap, gpa, prot, bits);
+		}
+		if (rc < 0)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+	gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+	int idx, rc;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	mmap_read_lock(vcpu->arch.gmap->mm);
+
+	rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+	mmap_read_unlock(vcpu->arch.gmap->mm);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	return rc;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
 retry:
@@ -3900,9 +4599,8 @@ retry:
 	 */
 	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
 		int rc;
-		rc = gmap_mprotect_notify(vcpu->arch.gmap,
-					  kvm_s390_get_prefix(vcpu),
-					  PAGE_SIZE * 2, PROT_WRITE);
+
+		rc = kvm_s390_mprotect_notify_prefix(vcpu);
 		if (rc) {
 			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
 			return rc;
@@ -3957,8 +4655,6 @@ retry:
 		goto retry;
 	}
 
-	/* nothing to do, just clear the request */
-	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 	/* we left the vsie handler, nothing to do, just clear the request */
 	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
 
@@ -3993,13 +4689,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t
 	preempt_enable();
 }
 
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
-{
-	mutex_lock(&kvm->lock);
-	__kvm_s390_set_tod_clock(kvm, gtod);
-	mutex_unlock(&kvm->lock);
-}
-
 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
 {
 	if (!mutex_trylock(&kvm->lock))
@@ -4009,22 +4698,6 @@ int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clo
 	return 1;
 }
 
-/**
- * kvm_arch_fault_in_page - fault-in guest page if necessary
- * @vcpu: The corresponding virtual cpu
- * @gpa: Guest physical address
- * @writable: Whether the page should be writable or not
- *
- * Make sure that a guest page has been faulted-in on the host.
- *
- * Return: Zero on success, negative error code otherwise.
- */
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
-{
-	return gmap_fault(vcpu->arch.gmap, gpa,
-			  writable ? FAULT_FLAG_WRITE : 0);
-}
-
 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
 				      unsigned long token)
 {
@@ -4092,12 +4765,11 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.gmap->pfault_enabled)
 		return false;
 
-	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
-	hva += current->thread.gmap_addr & ~PAGE_MASK;
+	hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
 		return false;
 
-	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+	return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
 }
 
 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -4119,7 +4791,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
-		if (rc)
+		if (rc || guestdbg_exit_pending(vcpu))
 			return rc;
 	}
 
@@ -4135,6 +4807,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
 
 	vcpu->arch.sie_block->icptcode = 0;
+	current->thread.gmap_int_code = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
@@ -4142,7 +4815,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_pgm_info pgm_info = {
 		.code = PGM_ADDRESSING,
@@ -4178,10 +4851,182 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
 }
 
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+	KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+		"Unexpected program interrupt 0x%x, TEID 0x%016lx",
+		current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+	struct kvm_memory_slot *slot;
+	unsigned int fault_flags;
+	bool writable, unlocked;
+	unsigned long vmaddr;
+	struct page *page;
+	kvm_pfn_t pfn;
+	int rc;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+		return vcpu_post_run_addressing_exception(vcpu);
+
+	fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+	if (vcpu->arch.gmap->pfault_enabled)
+		flags |= FOLL_NOWAIT;
+	vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+	pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+	/* Access outside memory, inject addressing exception */
+	if (is_noslot_pfn(pfn))
+		return vcpu_post_run_addressing_exception(vcpu);
+	/* Signal pending: try again */
+	if (pfn == KVM_PFN_ERR_SIGPENDING)
+		return -EAGAIN;
+
+	/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+	if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+		trace_kvm_s390_major_guest_pfault(vcpu);
+		if (kvm_arch_setup_async_pf(vcpu))
+			return 0;
+		vcpu->stat.pfault_sync++;
+		/* Could not setup async pfault, try again synchronously */
+		flags &= ~FOLL_NOWAIT;
+		goto try_again;
+	}
+	/* Any other error */
+	if (is_error_pfn(pfn))
+		return -EFAULT;
+
+	/* Success */
+	mmap_read_lock(vcpu->arch.gmap->mm);
+	/* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+	rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+	if (!rc)
+		rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+	scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+		kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+	}
+	mmap_read_unlock(vcpu->arch.gmap->mm);
+	return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+	unsigned long gaddr_tmp;
+	gfn_t gfn;
+
+	gfn = gpa_to_gfn(gaddr);
+	if (kvm_is_ucontrol(vcpu->kvm)) {
+		/*
+		 * This translates the per-vCPU guest address into a
+		 * fake guest address, which can then be used with the
+		 * fake memslots that are identity mapping userspace.
+		 * This allows ucontrol VMs to use the normal fault
+		 * resolution path, like normal VMs.
+		 */
+		mmap_read_lock(vcpu->arch.gmap->mm);
+		gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+		mmap_read_unlock(vcpu->arch.gmap->mm);
+		if (gaddr_tmp == -EFAULT) {
+			vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+			vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+			vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+			return -EREMOTE;
+		}
+		gfn = gpa_to_gfn(gaddr_tmp);
+	}
+	return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
+static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
+{
+	unsigned int flags = 0;
+	unsigned long gaddr;
+	int rc;
+
+	gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
+	if (kvm_s390_cur_gmap_fault_is_write())
+		flags = FAULT_FLAG_WRITE;
+
+	switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
+	case 0:
+		vcpu->stat.exit_null++;
+		break;
+	case PGM_SECURE_STORAGE_ACCESS:
+	case PGM_SECURE_STORAGE_VIOLATION:
+		kvm_s390_assert_primary_as(vcpu);
+		/*
+		 * This can happen after a reboot with asynchronous teardown;
+		 * the new guest (normal or protected) will run on top of the
+		 * previous protected guest. The old pages need to be destroyed
+		 * so the new guest can use them.
+		 */
+		if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
+			/*
+			 * Either KVM messed up the secure guest mapping or the
+			 * same page is mapped into multiple secure guests.
+			 *
+			 * This exception is only triggered when a guest 2 is
+			 * running and can therefore never occur in kernel
+			 * context.
+			 */
+			pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
+					    current->thread.gmap_int_code, current->comm,
+					    current->pid);
+			send_sig(SIGSEGV, current, 0);
+		}
+		break;
+	case PGM_NON_SECURE_STORAGE_ACCESS:
+		kvm_s390_assert_primary_as(vcpu);
+		/*
+		 * This is normal operation; a page belonging to a protected
+		 * guest has not been imported yet. Try to import the page into
+		 * the protected guest.
+		 */
+		rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
+		if (rc == -EINVAL)
+			send_sig(SIGSEGV, current, 0);
+		if (rc != -ENXIO)
+			break;
+		flags = FAULT_FLAG_WRITE;
+		fallthrough;
+	case PGM_PROTECTION:
+	case PGM_SEGMENT_TRANSLATION:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_ASCE_TYPE:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+		kvm_s390_assert_primary_as(vcpu);
+		return vcpu_dat_fault_handler(vcpu, gaddr, flags);
+	default:
+		KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+			current->thread.gmap_int_code, current->thread.gmap_teid.val);
+		send_sig(SIGSEGV, current, 0);
+		break;
+	}
+	return 0;
+}
+
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
 	struct mcck_volatile_info *mcck_info;
 	struct sie_page *sie_page;
+	int rc;
 
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
@@ -4203,7 +5048,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	}
 
 	if (vcpu->arch.sie_block->icptcode > 0) {
-		int rc = kvm_handle_sie_intercept(vcpu);
+		rc = kvm_handle_sie_intercept(vcpu);
 
 		if (rc != -EOPNOTSUPP)
 			return rc;
@@ -4212,24 +5057,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
 		return -EREMOTE;
-	} else if (exit_reason != -EFAULT) {
-		vcpu->stat.exit_null++;
-		return 0;
-	} else if (kvm_is_ucontrol(vcpu->kvm)) {
-		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
-		vcpu->run->s390_ucontrol.trans_exc_code =
-						current->thread.gmap_addr;
-		vcpu->run->s390_ucontrol.pgm_code = 0x10;
-		return -EREMOTE;
-	} else if (current->thread.gmap_pfault) {
-		trace_kvm_s390_major_guest_pfault(vcpu);
-		current->thread.gmap_pfault = 0;
-		if (kvm_arch_setup_async_pf(vcpu))
-			return 0;
-		vcpu->stat.pfault_sync++;
-		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
 	}
-	return vcpu_post_run_fault_in_sie(vcpu);
+
+	return vcpu_post_run_handle_fault(vcpu);
 }
 
 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
@@ -4246,7 +5076,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
 	do {
 		rc = vcpu_pre_run(vcpu);
-		if (rc)
+		if (rc || guestdbg_exit_pending(vcpu))
 			break;
 
 		kvm_vcpu_srcu_read_unlock(vcpu);
@@ -4263,10 +5093,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			       vcpu->run->s.regs.gprs,
 			       sizeof(sie_page->pv_grregs));
 		}
-		if (test_cpu_flag(CIF_FPU))
-			load_fpu_regs();
 		exit_reason = sie64a(vcpu->arch.sie_block,
-				     vcpu->run->s.regs.gprs);
+				     vcpu->run->s.regs.gprs,
+				     vcpu->arch.gmap->asce);
 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
 			memcpy(vcpu->run->s.regs.gprs,
 			       sie_page->pv_grregs,
@@ -4351,9 +5180,9 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
 	}
-	if (MACHINE_HAS_GS) {
+	if (cpu_has_gs()) {
 		preempt_disable();
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		if (current->thread.gs_cb) {
 			vcpu->arch.host_gscb = current->thread.gs_cb;
 			save_gs_cb(vcpu->arch.host_gscb);
@@ -4385,19 +5214,8 @@ static void sync_regs(struct kvm_vcpu *vcpu)
 	}
 	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
-	/* save host (userspace) fprs/vrs */
-	save_fpu_regs();
-	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
-	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
-	if (MACHINE_HAS_VX)
-		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
-	else
-		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
-	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-	if (test_fp_ctl(current->thread.fpu.fpc))
-		/* User space provided an invalid FPC, let's clear it */
-		current->thread.fpu.fpc = 0;
-
+	vcpu->arch.acrs_loaded = true;
+	kvm_s390_fpu_load(vcpu->run);
 	/* Sync fmt2 only data */
 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
 		sync_regs_fmt2(vcpu);
@@ -4428,15 +5246,15 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu)
 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
 	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
-	if (MACHINE_HAS_GS) {
+	if (cpu_has_gs()) {
 		preempt_disable();
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		if (vcpu->arch.gs_enabled)
 			save_gs_cb(current->thread.gs_cb);
 		current->thread.gs_cb = vcpu->arch.host_gscb;
 		restore_gs_cb(vcpu->arch.host_gscb);
 		if (!vcpu->arch.host_gscb)
-			__ctl_clear_bit(2, 4);
+			local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
 		vcpu->arch.host_gscb = NULL;
 		preempt_enable();
 	}
@@ -4458,12 +5276,8 @@ static void store_regs(struct kvm_vcpu *vcpu)
 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
-	/* Save guest register state */
-	save_fpu_regs();
-	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-	/* Restore will be done lazily at return */
-	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
-	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+	vcpu->arch.acrs_loaded = false;
+	kvm_s390_fpu_store(vcpu->run);
 	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
 		store_regs_fmt2(vcpu);
 }
@@ -4471,9 +5285,19 @@ static void store_regs(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *kvm_run = vcpu->run;
+	DECLARE_KERNEL_FPU_ONSTACK32(fpu);
 	int rc;
 
-	if (kvm_run->immediate_exit)
+	/*
+	 * Running a VM while dumping always has the potential to
+	 * produce inconsistent dump data. But for PV vcpus a SIE
+	 * entry while dumping could also lead to a fatal validity
+	 * intercept which we absolutely want to avoid.
+	 */
+	if (vcpu->kvm->arch.pv.dumping)
+		return -EINVAL;
+
+	if (!vcpu->wants_to_run)
 		return -EINTR;
 
 	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
@@ -4503,6 +5327,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
 	sync_regs(vcpu);
 	enable_cpu_timer_accounting(vcpu);
 
@@ -4526,6 +5351,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 	disable_cpu_timer_accounting(vcpu);
 	store_regs(vcpu);
+	kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
 
 	kvm_sigset_deactivate(vcpu);
 
@@ -4562,7 +5388,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 		gpa -= __LC_FPREGS_SAVE_AREA;
 
 	/* manually convert vector registers if necessary */
-	if (MACHINE_HAS_VX) {
+	if (cpu_has_vx()) {
 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
 				     fprs, 128);
@@ -4600,8 +5426,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * switch in the run ioctl. Let's update our copies before we save
 	 * it into the save area
 	 */
-	save_fpu_regs();
-	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+	kvm_s390_fpu_store(vcpu->run);
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -4768,6 +5593,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
 				  struct kvm_s390_mem_op *mop)
 {
 	void __user *uaddr = (void __user *)mop->buf;
+	void *sida_addr;
 	int r = 0;
 
 	if (mop->flags || !mop->size)
@@ -4779,16 +5605,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EINVAL;
 
+	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
+
 	switch (mop->op) {
 	case KVM_S390_MEMOP_SIDA_READ:
-		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
-				 mop->sida_offset), mop->size))
+		if (copy_to_user(uaddr, sida_addr, mop->size))
 			r = -EFAULT;
 
 		break;
 	case KVM_S390_MEMOP_SIDA_WRITE:
-		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
-				   mop->sida_offset), uaddr, mop->size))
+		if (copy_from_user(sida_addr, uaddr, mop->size))
 			r = -EFAULT;
 		break;
 	}
@@ -4799,62 +5625,54 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
 				 struct kvm_s390_mem_op *mop)
 {
 	void __user *uaddr = (void __user *)mop->buf;
+	enum gacc_mode acc_mode;
 	void *tmpbuf = NULL;
-	int r = 0;
-	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
-				    | KVM_S390_MEMOP_F_CHECK_ONLY
-				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
+	int r;
 
-	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
+					KVM_S390_MEMOP_F_CHECK_ONLY |
+					KVM_S390_MEMOP_F_SKEY_PROTECTION);
+	if (r)
+		return r;
+	if (mop->ar >= NUM_ACRS)
 		return -EINVAL;
-	if (mop->size > MEM_OP_MAX_SIZE)
-		return -E2BIG;
 	if (kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EINVAL;
-	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
-		if (access_key_invalid(mop->key))
-			return -EINVAL;
-	} else {
-		mop->key = 0;
-	}
 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
 		tmpbuf = vmalloc(mop->size);
 		if (!tmpbuf)
 			return -ENOMEM;
 	}
 
-	switch (mop->op) {
-	case KVM_S390_MEMOP_LOGICAL_READ:
-		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
-					    GACC_FETCH, mop->key);
-			break;
-		}
+	acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
+	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+		r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+				    acc_mode, mop->key);
+		goto out_inject;
+	}
+	if (acc_mode == GACC_FETCH) {
 		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
 					mop->size, mop->key);
-		if (r == 0) {
-			if (copy_to_user(uaddr, tmpbuf, mop->size))
-				r = -EFAULT;
-		}
-		break;
-	case KVM_S390_MEMOP_LOGICAL_WRITE:
-		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
-					    GACC_STORE, mop->key);
-			break;
+		if (r)
+			goto out_inject;
+		if (copy_to_user(uaddr, tmpbuf, mop->size)) {
+			r = -EFAULT;
+			goto out_free;
 		}
+	} else {
 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
 			r = -EFAULT;
-			break;
+			goto out_free;
 		}
 		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
 					 mop->size, mop->key);
-		break;
 	}
 
+out_inject:
 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 
+out_free:
 	vfree(tmpbuf);
 	return r;
 }
@@ -4889,6 +5707,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 {
 	struct kvm_vcpu *vcpu = filp->private_data;
 	void __user *argp = (void __user *)arg;
+	int rc;
 
 	switch (ioctl) {
 	case KVM_S390_IRQ: {
@@ -4896,7 +5715,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 
 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
 			return -EFAULT;
-		return kvm_s390_inject_vcpu(vcpu, &s390irq);
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
 	}
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
@@ -4906,10 +5726,67 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 			return -EFAULT;
 		if (s390int_to_s390irq(&s390int, &s390irq))
 			return -EINVAL;
-		return kvm_s390_inject_vcpu(vcpu, &s390irq);
+		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
 	}
+	default:
+		rc = -ENOIOCTLCMD;
+		break;
 	}
-	return -ENOIOCTLCMD;
+
+	/*
+	 * To simplify single stepping of userspace-emulated instructions,
+	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+	 * should_handle_per_ifetch()). However, if userspace emulation injects
+	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+	 * after (and not before) the interrupt delivery.
+	 */
+	if (!rc)
+		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+	return rc;
+}
+
+static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
+					struct kvm_pv_cmd *cmd)
+{
+	struct kvm_s390_pv_dmp dmp;
+	void *data;
+	int ret;
+
+	/* Dump initialization is a prerequisite */
+	if (!vcpu->kvm->arch.pv.dumping)
+		return -EINVAL;
+
+	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
+		return -EFAULT;
+
+	/* We only handle this subcmd right now */
+	if (dmp.subcmd != KVM_PV_DUMP_CPU)
+		return -EINVAL;
+
+	/* CPU dump length is the same as create cpu storage donation. */
+	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
+		return -EINVAL;
+
+	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
+
+	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
+		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
+
+	if (ret)
+		ret = -EINVAL;
+
+	/* On success copy over the dump data */
+	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
+		ret = -EFAULT;
+
+	kvfree(data);
+	return ret;
 }
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
@@ -5021,7 +5898,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 #endif
 	case KVM_S390_VCPU_FAULT: {
-		r = gmap_fault(vcpu->arch.gmap, arg, 0);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = vcpu_dat_fault_handler(vcpu, arg, 0);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	}
 	case KVM_ENABLE_CAP:
@@ -5076,6 +5955,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 					   irq_state.len);
 		break;
 	}
+	case KVM_S390_PV_CPU_COMMAND: {
+		struct kvm_pv_cmd cmd;
+
+		r = -EINVAL;
+		if (!is_prot_virt_host())
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&cmd, argp, sizeof(cmd)))
+			break;
+
+		r = -EINVAL;
+		if (cmd.flags)
+			break;
+
+		/* We only handle this cmd right now */
+		if (cmd.cmd != KVM_PV_DUMP)
+			break;
+
+		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
+
+		/* Always copy over UV rc / rrc data */
+		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
+				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
+			r = -EFAULT;
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -5097,6 +6003,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return true;
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   const struct kvm_memory_slot *old,
@@ -5105,27 +6016,47 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 {
 	gpa_t size;
 
+	if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
+		return -EINVAL;
+
 	/* When we are protected, we should not change the memory slots */
 	if (kvm_s390_pv_get_handle(kvm))
 		return -EINVAL;
 
-	if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
-		return 0;
+	if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+		/*
+		 * A few sanity checks. We can have memory slots which have to be
+		 * located/ended at a segment boundary (1MB). The memory in userland is
+		 * ok to be fragmented into various different vmas. It is okay to mmap()
+		 * and munmap() stuff in this slot after doing this call at any time
+		 */
+
+		if (new->userspace_addr & 0xffffful)
+			return -EINVAL;
 
-	/* A few sanity checks. We can have memory slots which have to be
-	   located/ended at a segment boundary (1MB). The memory in userland is
-	   ok to be fragmented into various different vmas. It is okay to mmap()
-	   and munmap() stuff in this slot after doing this call at any time */
+		size = new->npages * PAGE_SIZE;
+		if (size & 0xffffful)
+			return -EINVAL;
 
-	if (new->userspace_addr & 0xffffful)
-		return -EINVAL;
+		if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+			return -EINVAL;
+	}
 
-	size = new->npages * PAGE_SIZE;
-	if (size & 0xffffful)
-		return -EINVAL;
+	if (!kvm->arch.migration_mode)
+		return 0;
 
-	if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
-		return -EINVAL;
+	/*
+	 * Turn off migration mode when:
+	 * - userspace creates a new memslot with dirty logging off,
+	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+	 *   dirty logging is turned off.
+	 * Migration mode expects dirty page logging being enabled to store
+	 * its dirty bitmap.
+	 */
+	if (change != KVM_MR_DELETE &&
+	    !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+		WARN(kvm_s390_vm_stop_migration(kvm),
+		     "Failed to stop migration mode");
 
 	return 0;
 }
@@ -5137,6 +6068,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 {
 	int rc = 0;
 
+	if (kvm_is_ucontrol(kvm))
+		return;
+
 	switch (change) {
 	case KVM_MR_DELETE:
 		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
@@ -5172,7 +6106,7 @@ static inline unsigned long nonhyp_mask(int i)
 
 static int __init kvm_s390_init(void)
 {
-	int i;
+	int i, r;
 
 	if (!sclp.has_sief2) {
 		pr_info("SIE is not available\n");
@@ -5188,12 +6122,23 @@ static int __init kvm_s390_init(void)
 		kvm_s390_fac_base[i] |=
 			stfle_fac_list[i] & nonhyp_mask(i);
 
-	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	r = __kvm_s390_init();
+	if (r)
+		return r;
+
+	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (r) {
+		__kvm_s390_exit();
+		return r;
+	}
+	return 0;
 }
 
 static void __exit kvm_s390_exit(void)
 {
 	kvm_exit();
+
+	__kvm_s390_exit();
 }
 
 module_init(kvm_s390_init);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 497d52a83c78..c44fe0c3a097 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -20,10 +20,31 @@
 #include <asm/processor.h>
 #include <asm/sclp.h>
 
+#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0)
+
+static inline void kvm_s390_fpu_store(struct kvm_run *run)
+{
+	fpu_stfpc(&run->s.regs.fpc);
+	if (cpu_has_vx())
+		save_vx_regs((__vector128 *)&run->s.regs.vrs);
+	else
+		save_fp_regs((freg_t *)&run->s.regs.fprs);
+}
+
+static inline void kvm_s390_fpu_load(struct kvm_run *run)
+{
+	fpu_lfpc_safe(&run->s.regs.fpc);
+	if (cpu_has_vx())
+		load_vx_regs((__vector128 *)&run->s.regs.vrs);
+	else
+		load_fp_regs((freg_t *)&run->s.regs.fprs);
+}
+
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & ECB_TE))
 #define TDB_FORMAT1		1
-#define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+#define IS_ITDB_VALID(vcpu) \
+	((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1))
 
 extern debug_info_t *kvm_s390_dbf;
 extern debug_info_t *kvm_s390_dbf_uv;
@@ -119,6 +140,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
 	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+	u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+	s64 disp1;
+
+	/* The displacement is a 20bit _SIGNED_ value */
+	disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			      ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+	if (ar)
+		*ar = base1;
+
+	return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 					      u64 *address1, u64 *address2,
 					      u8 *ar_b1, u8 *ar_b2)
@@ -233,16 +269,33 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
 
 static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
 {
-	u32 gd = (u32)(u64)kvm->arch.gisa_int.origin;
+	u32 gd;
+
+	if (!kvm->arch.gisa_int.origin)
+		return 0;
+
+	gd = virt_to_phys(kvm->arch.gisa_int.origin);
 
 	if (gd && sclp.has_gisaf)
 		gd |= GISA_FORMAT1;
 	return gd;
 }
 
+static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa)
+{
+	hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	if (!kvm_is_error_hva(hva))
+		hva |= offset_in_page(gpa);
+	return hva;
+}
+
 /* implemented in pv.c */
 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
@@ -250,6 +303,14 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
 		       unsigned long tweak, u16 *rc, u16 *rrc);
 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+			      u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
 
 static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
 {
@@ -261,16 +322,39 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
 	return vcpu->arch.pv.handle;
 }
 
-static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+/**
+ * __kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @page: the page to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: must be called holding the mm lock for gmap->mm
+ */
+static inline int __kvm_s390_pv_destroy_page(struct page *page)
 {
-	lockdep_assert_held(&kvm->lock);
-	return !!kvm_s390_pv_get_handle(kvm);
-}
+	struct folio *folio = page_folio(page);
+	int rc;
 
-static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
-{
-	lockdep_assert_held(&vcpu->mutex);
-	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+	/* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
+	if (folio_test_large(folio))
+		return -EFAULT;
+
+	rc = uv_destroy_folio(folio);
+	/*
+	 * Fault handlers can race; it is possible that two CPUs will fault
+	 * on the same secure page. One CPU can destroy the page, reboot,
+	 * re-enter secure mode and import it, while the second CPU was
+	 * stuck at the beginning of the handler. At some point the second
+	 * CPU will be able to progress, and it will not be able to destroy
+	 * the page. In that case we do not want to terminate the process,
+	 * we instead try to export the page.
+	 */
+	if (rc)
+		rc = uv_convert_from_secure_folio(folio);
+
+	return rc;
 }
 
 /* implemented in interrupt.c */
@@ -352,15 +436,17 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
 				 unsigned long end);
 void kvm_s390_vsie_init(struct kvm *kvm);
 void kvm_s390_vsie_destroy(struct kvm *kvm);
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
+
+/* implemented in gmap-vsie.c */
+struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
 
 /* implemented in sigp.c */
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
@@ -374,6 +460,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags);
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+			     unsigned long bits);
+
+static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags)
+{
+	return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags);
+}
 
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
@@ -461,7 +556,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm);
 void kvm_s390_gisa_destroy(struct kvm *kvm);
 void kvm_s390_gisa_disable(struct kvm *kvm);
 void kvm_s390_gisa_enable(struct kvm *kvm);
-int kvm_s390_gib_init(u8 nisc);
+int __init kvm_s390_gib_init(u8 nisc);
 void kvm_s390_gib_destroy(void);
 
 /* implemented in guestdbg.c */
@@ -494,6 +589,13 @@ static inline int kvm_s390_use_sca_entries(void)
 void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
 				     struct mcck_volatile_info *mcck_info);
 
+static inline bool kvm_s390_cur_gmap_fault_is_write(void)
+{
+	if (current->thread.gmap_int_code == PGM_PROTECTION)
+		return true;
+	return test_facility(75) && (current->thread.gmap_teid.fsi == TEID_FSI_STORE);
+}
+
 /**
  * kvm_s390_vcpu_crypto_reset_all
  *
@@ -508,6 +610,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
 
 /**
+ * kvm_s390_vcpu_pci_enable_interp
+ *
+ * Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
+ * interpretation as well as adapter interruption forwarding.
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
+
+/**
  * diag9c_forwarding_hz
  *
  * Set the maximum number of diag9c forwarding per second
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
new file mode 100644
index 000000000000..8c40154ff50f
--- /dev/null
+++ b/arch/s390/kvm/pci.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *    Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <asm/pci.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_io.h>
+#include <asm/sclp.h>
+#include "pci.h"
+#include "kvm-s390.h"
+
+struct zpci_aift *aift;
+
+static inline int __set_irq_noiib(u16 ctl, u8 isc)
+{
+	union zpci_sic_iib iib = {{0}};
+
+	return zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+void kvm_s390_pci_aen_exit(void)
+{
+	unsigned long flags;
+	struct kvm_zdev **gait_kzdev;
+
+	lockdep_assert_held(&aift->aift_lock);
+
+	/*
+	 * Contents of the aipb remain registered for the life of the host
+	 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
+	 * in case we insert the KVM module again later.  Clear the AIFT
+	 * information and free anything not registered with underlying
+	 * firmware.
+	 */
+	spin_lock_irqsave(&aift->gait_lock, flags);
+	gait_kzdev = aift->kzdev;
+	aift->gait = NULL;
+	aift->sbv = NULL;
+	aift->kzdev = NULL;
+	spin_unlock_irqrestore(&aift->gait_lock, flags);
+
+	kfree(gait_kzdev);
+}
+
+static int zpci_setup_aipb(u8 nisc)
+{
+	struct page *page;
+	int size, rc;
+
+	zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
+	if (!zpci_aipb)
+		return -ENOMEM;
+
+	aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
+	if (!aift->sbv) {
+		rc = -ENOMEM;
+		goto free_aipb;
+	}
+	zpci_aif_sbv = aift->sbv;
+	size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
+						sizeof(struct zpci_gaite)));
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
+	if (!page) {
+		rc = -ENOMEM;
+		goto free_sbv;
+	}
+	aift->gait = (struct zpci_gaite *)page_to_virt(page);
+
+	zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
+	zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
+	zpci_aipb->aipb.afi = nisc;
+	zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
+
+	/* Setup Adapter Event Notification Interpretation */
+	if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
+		rc = -EIO;
+		goto free_gait;
+	}
+
+	return 0;
+
+free_gait:
+	free_pages((unsigned long)aift->gait, size);
+free_sbv:
+	airq_iv_release(aift->sbv);
+	zpci_aif_sbv = NULL;
+free_aipb:
+	kfree(zpci_aipb);
+	zpci_aipb = NULL;
+
+	return rc;
+}
+
+static int zpci_reset_aipb(u8 nisc)
+{
+	/*
+	 * AEN registration can only happen once per system boot.  If
+	 * an aipb already exists then AEN was already registered and
+	 * we can reuse the aipb contents.  This can only happen if
+	 * the KVM module was removed and re-inserted.  However, we must
+	 * ensure that the same forwarding ISC is used as this is assigned
+	 * during KVM module load.
+	 */
+	if (zpci_aipb->aipb.afi != nisc)
+		return -EINVAL;
+
+	aift->sbv = zpci_aif_sbv;
+	aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
+
+	return 0;
+}
+
+int kvm_s390_pci_aen_init(u8 nisc)
+{
+	int rc = 0;
+
+	/* If already enabled for AEN, bail out now */
+	if (aift->gait || aift->sbv)
+		return -EPERM;
+
+	mutex_lock(&aift->aift_lock);
+	aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
+			      GFP_KERNEL);
+	if (!aift->kzdev) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	if (!zpci_aipb)
+		rc = zpci_setup_aipb(nisc);
+	else
+		rc = zpci_reset_aipb(nisc);
+	if (rc)
+		goto free_zdev;
+
+	/* Enable floating IRQs */
+	if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
+		rc = -EIO;
+		kvm_s390_pci_aen_exit();
+	}
+
+	goto unlock;
+
+free_zdev:
+	kfree(aift->kzdev);
+unlock:
+	mutex_unlock(&aift->aift_lock);
+	return rc;
+}
+
+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib fib = {};
+	u8 status;
+
+	fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+	fib.fmt0.sum = 1;       /* enable summary notifications */
+	fib.fmt0.noi = airq_iv_end(zdev->aibv);
+	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+	fib.fmt0.aibvo = 0;
+	fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+	fib.fmt0.aisbo = zdev->aisb & 63;
+	fib.gd = zdev->gisa;
+
+	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+	struct zpci_fib fib = {};
+	u8 cc, status;
+
+	fib.gd = zdev->gisa;
+
+	cc = zpci_mod_fc(req, &fib, &status);
+	if (cc == 3 || (cc == 1 && status == 24))
+		/* Function already gone or IRQs already deregistered. */
+		cc = 0;
+
+	return cc ? -EIO : 0;
+}
+
+static inline void unaccount_mem(unsigned long nr_pages)
+{
+	struct user_struct *user = get_uid(current_user());
+
+	if (user)
+		atomic_long_sub(nr_pages, &user->locked_vm);
+	if (current->mm)
+		atomic64_sub(nr_pages, &current->mm->pinned_vm);
+}
+
+static inline int account_mem(unsigned long nr_pages)
+{
+	struct user_struct *user = get_uid(current_user());
+	unsigned long page_limit, cur_pages, new_pages;
+
+	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	cur_pages = atomic_long_read(&user->locked_vm);
+	do {
+		new_pages = cur_pages + nr_pages;
+		if (new_pages > page_limit)
+			return -ENOMEM;
+	} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
+
+	atomic64_add(nr_pages, &current->mm->pinned_vm);
+
+	return 0;
+}
+
+static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+				   bool assist)
+{
+	struct page *pages[1], *aibv_page, *aisb_page = NULL;
+	unsigned int msi_vecs, idx;
+	struct zpci_gaite *gaite;
+	unsigned long hva, bit;
+	struct kvm *kvm;
+	phys_addr_t gaddr;
+	int rc = 0, gisc, npages, pcount = 0;
+
+	/*
+	 * Interrupt forwarding is only applicable if the device is already
+	 * enabled for interpretation
+	 */
+	if (zdev->gisa == 0)
+		return -EINVAL;
+
+	kvm = zdev->kzdev->kvm;
+	msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+	/* Get the associated forwarding ISC - if invalid, return the error */
+	gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
+	if (gisc < 0)
+		return gisc;
+
+	/* Replace AIBV address */
+	idx = srcu_read_lock(&kvm->srcu);
+	hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+	npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
+	srcu_read_unlock(&kvm->srcu, idx);
+	if (npages < 1) {
+		rc = -EIO;
+		goto out;
+	}
+	aibv_page = pages[0];
+	pcount++;
+	gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+	fib->fmt0.aibv = gaddr;
+
+	/* Pin the guest AISB if one was specified */
+	if (fib->fmt0.sum == 1) {
+		idx = srcu_read_lock(&kvm->srcu);
+		hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+		npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
+					     pages);
+		srcu_read_unlock(&kvm->srcu, idx);
+		if (npages < 1) {
+			rc = -EIO;
+			goto unpin1;
+		}
+		aisb_page = pages[0];
+		pcount++;
+	}
+
+	/* Account for pinned pages, roll back on failure */
+	if (account_mem(pcount))
+		goto unpin2;
+
+	/* AISB must be allocated before we can fill in GAITE */
+	mutex_lock(&aift->aift_lock);
+	bit = airq_iv_alloc_bit(aift->sbv);
+	if (bit == -1UL)
+		goto unlock;
+	zdev->aisb = bit; /* store the summary bit number */
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+				    AIRQ_IV_BITLOCK |
+				    AIRQ_IV_GUESTVEC,
+				    phys_to_virt(fib->fmt0.aibv));
+
+	spin_lock_irq(&aift->gait_lock);
+	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+						   sizeof(struct zpci_gaite));
+
+	/* If assist not requested, host will get all alerts */
+	if (assist)
+		gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+	else
+		gaite->gisa = 0;
+
+	gaite->gisc = fib->fmt0.isc;
+	gaite->count++;
+	gaite->aisbo = fib->fmt0.aisbo;
+	gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
+							      ~PAGE_MASK));
+	aift->kzdev[zdev->aisb] = zdev->kzdev;
+	spin_unlock_irq(&aift->gait_lock);
+
+	/* Update guest FIB for re-issue */
+	fib->fmt0.aisbo = zdev->aisb & 63;
+	fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+	fib->fmt0.isc = gisc;
+
+	/* Save some guest fib values in the host for later use */
+	zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
+	zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
+	mutex_unlock(&aift->aift_lock);
+
+	/* Issue the clp to setup the irq now */
+	rc = kvm_zpci_set_airq(zdev);
+	return rc;
+
+unlock:
+	mutex_unlock(&aift->aift_lock);
+unpin2:
+	if (fib->fmt0.sum == 1)
+		unpin_user_page(aisb_page);
+unpin1:
+	unpin_user_page(aibv_page);
+out:
+	return rc;
+}
+
+static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
+{
+	struct kvm_zdev *kzdev = zdev->kzdev;
+	struct zpci_gaite *gaite;
+	struct page *vpage = NULL, *spage = NULL;
+	int rc, pcount = 0;
+	u8 isc;
+
+	if (zdev->gisa == 0)
+		return -EINVAL;
+
+	mutex_lock(&aift->aift_lock);
+
+	/*
+	 * If the clear fails due to an error, leave now unless we know this
+	 * device is about to go away (force) -- In that case clear the GAITE
+	 * regardless.
+	 */
+	rc = kvm_zpci_clear_airq(zdev);
+	if (rc && !force)
+		goto out;
+
+	if (zdev->kzdev->fib.fmt0.aibv == 0)
+		goto out;
+	spin_lock_irq(&aift->gait_lock);
+	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+						   sizeof(struct zpci_gaite));
+	isc = gaite->gisc;
+	gaite->count--;
+	if (gaite->count == 0) {
+		/* Release guest AIBV and AISB */
+		vpage = phys_to_page(kzdev->fib.fmt0.aibv);
+		if (gaite->aisb != 0)
+			spage = phys_to_page(gaite->aisb);
+		/* Clear the GAIT entry */
+		gaite->aisb = 0;
+		gaite->gisc = 0;
+		gaite->aisbo = 0;
+		gaite->gisa = 0;
+		aift->kzdev[zdev->aisb] = NULL;
+		/* Clear zdev info */
+		airq_iv_free_bit(aift->sbv, zdev->aisb);
+		airq_iv_release(zdev->aibv);
+		zdev->aisb = 0;
+		zdev->aibv = NULL;
+	}
+	spin_unlock_irq(&aift->gait_lock);
+	kvm_s390_gisc_unregister(kzdev->kvm, isc);
+	kzdev->fib.fmt0.isc = 0;
+	kzdev->fib.fmt0.aibv = 0;
+
+	if (vpage) {
+		unpin_user_page(vpage);
+		pcount++;
+	}
+	if (spage) {
+		unpin_user_page(spage);
+		pcount++;
+	}
+	if (pcount > 0)
+		unaccount_mem(pcount);
+out:
+	mutex_unlock(&aift->aift_lock);
+
+	return rc;
+}
+
+static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
+{
+	struct kvm_zdev *kzdev;
+
+	kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
+	if (!kzdev)
+		return -ENOMEM;
+
+	kzdev->zdev = zdev;
+	zdev->kzdev = kzdev;
+
+	return 0;
+}
+
+static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
+{
+	struct kvm_zdev *kzdev;
+
+	kzdev = zdev->kzdev;
+	WARN_ON(kzdev->zdev != zdev);
+	zdev->kzdev = NULL;
+	kfree(kzdev);
+}
+
+
+/*
+ * Register device with the specified KVM. If interpretation facilities are
+ * available, enable them and let userspace indicate whether or not they will
+ * be used (specify SHM bit to disable).
+ */
+static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
+{
+	struct zpci_dev *zdev = opaque;
+	int rc;
+
+	if (!zdev)
+		return -EINVAL;
+
+	mutex_lock(&zdev->kzdev_lock);
+
+	if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
+		mutex_unlock(&zdev->kzdev_lock);
+		return -EINVAL;
+	}
+
+	kvm_get_kvm(kvm);
+
+	mutex_lock(&kvm->lock);
+
+	rc = kvm_s390_pci_dev_open(zdev);
+	if (rc)
+		goto err;
+
+	/*
+	 * If interpretation facilities aren't available, add the device to
+	 * the kzdev list but don't enable for interpretation.
+	 */
+	if (!kvm_s390_pci_interp_allowed())
+		goto out;
+
+	/*
+	 * If this is the first request to use an interpreted device, make the
+	 * necessary vcpu changes
+	 */
+	if (!kvm->arch.use_zpci_interp)
+		kvm_s390_vcpu_pci_enable_interp(kvm);
+
+	if (zdev_enabled(zdev)) {
+		rc = zpci_disable_device(zdev);
+		if (rc)
+			goto err;
+	}
+
+	/*
+	 * Store information about the identity of the kvm guest allowed to
+	 * access this device via interpretation to be used by host CLP
+	 */
+	zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+	rc = zpci_reenable_device(zdev);
+	if (rc)
+		goto clear_gisa;
+
+out:
+	zdev->kzdev->kvm = kvm;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	return 0;
+
+clear_gisa:
+	zdev->gisa = 0;
+err:
+	if (zdev->kzdev)
+		kvm_s390_pci_dev_release(zdev);
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	kvm_put_kvm(kvm);
+	return rc;
+}
+
+static void kvm_s390_pci_unregister_kvm(void *opaque)
+{
+	struct zpci_dev *zdev = opaque;
+	struct kvm *kvm;
+
+	if (!zdev)
+		return;
+
+	mutex_lock(&zdev->kzdev_lock);
+
+	if (WARN_ON(!zdev->kzdev)) {
+		mutex_unlock(&zdev->kzdev_lock);
+		return;
+	}
+
+	kvm = zdev->kzdev->kvm;
+	mutex_lock(&kvm->lock);
+
+	/*
+	 * A 0 gisa means interpretation was never enabled, just remove the
+	 * device from the list.
+	 */
+	if (zdev->gisa == 0)
+		goto out;
+
+	/* Forwarding must be turned off before interpretation */
+	if (zdev->kzdev->fib.fmt0.aibv != 0)
+		kvm_s390_pci_aif_disable(zdev, true);
+
+	/* Remove the host CLP guest designation */
+	zdev->gisa = 0;
+
+	if (zdev_enabled(zdev)) {
+		if (zpci_disable_device(zdev))
+			goto out;
+	}
+
+	zpci_reenable_device(zdev);
+
+out:
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_del(&zdev->kzdev->entry);
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+	kvm_s390_pci_dev_release(zdev);
+
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+
+	kvm_put_kvm(kvm);
+}
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+	spin_lock_init(&kvm->arch.kzdev_list_lock);
+	INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+	/*
+	 * This list should already be empty, either via vfio device closures
+	 * or kvm fd cleanup.
+	 */
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+}
+
+static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
+{
+	struct zpci_dev *zdev = NULL;
+	struct kvm_zdev *kzdev;
+
+	spin_lock(&kvm->arch.kzdev_list_lock);
+	list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+		if (kzdev->zdev->fh == fh) {
+			zdev = kzdev->zdev;
+			break;
+		}
+	}
+	spin_unlock(&kvm->arch.kzdev_list_lock);
+
+	return zdev;
+}
+
+static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
+				     struct kvm_s390_zpci_op *args)
+{
+	struct zpci_fib fib = {};
+	bool hostflag;
+
+	fib.fmt0.aibv = args->u.reg_aen.ibv;
+	fib.fmt0.isc = args->u.reg_aen.isc;
+	fib.fmt0.noi = args->u.reg_aen.noi;
+	if (args->u.reg_aen.sb != 0) {
+		fib.fmt0.aisb = args->u.reg_aen.sb;
+		fib.fmt0.aisbo = args->u.reg_aen.sbo;
+		fib.fmt0.sum = 1;
+	} else {
+		fib.fmt0.aisb = 0;
+		fib.fmt0.aisbo = 0;
+		fib.fmt0.sum = 0;
+	}
+
+	hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
+	return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
+}
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
+{
+	struct kvm_zdev *kzdev;
+	struct zpci_dev *zdev;
+	int r;
+
+	zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
+	if (!zdev)
+		return -ENODEV;
+
+	mutex_lock(&zdev->kzdev_lock);
+	mutex_lock(&kvm->lock);
+
+	kzdev = zdev->kzdev;
+	if (!kzdev) {
+		r = -ENODEV;
+		goto out;
+	}
+	if (kzdev->kvm != kvm) {
+		r = -EPERM;
+		goto out;
+	}
+
+	switch (args->op) {
+	case KVM_S390_ZPCIOP_REG_AEN:
+		/* Fail on unknown flags */
+		if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_pci_zpci_reg_aen(zdev, args);
+		break;
+	case KVM_S390_ZPCIOP_DEREG_AEN:
+		r = kvm_s390_pci_aif_disable(zdev, false);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+	mutex_unlock(&zdev->kzdev_lock);
+	return r;
+}
+
+int __init kvm_s390_pci_init(void)
+{
+	zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
+	zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
+
+	if (!kvm_s390_pci_interp_allowed())
+		return 0;
+
+	aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
+	if (!aift)
+		return -ENOMEM;
+
+	spin_lock_init(&aift->gait_lock);
+	mutex_init(&aift->aift_lock);
+
+	return 0;
+}
+
+void kvm_s390_pci_exit(void)
+{
+	zpci_kvm_hook.kvm_register = NULL;
+	zpci_kvm_hook.kvm_unregister = NULL;
+
+	if (!kvm_s390_pci_interp_allowed())
+		return;
+
+	mutex_destroy(&aift->aift_lock);
+
+	kfree(aift);
+}
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
new file mode 100644
index 000000000000..ff0972dd5e71
--- /dev/null
+++ b/arch/s390/kvm/pci.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ *    Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef __KVM_S390_PCI_H
+#define __KVM_S390_PCI_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <asm/airq.h>
+#include <asm/cpu.h>
+
+struct kvm_zdev {
+	struct zpci_dev *zdev;
+	struct kvm *kvm;
+	struct zpci_fib fib;
+	struct list_head entry;
+};
+
+struct zpci_gaite {
+	u32 gisa;
+	u8 gisc;
+	u8 count;
+	u8 reserved;
+	u8 aisbo;
+	u64 aisb;
+};
+
+struct zpci_aift {
+	struct zpci_gaite *gait;
+	struct airq_iv *sbv;
+	struct kvm_zdev **kzdev;
+	spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
+	struct mutex aift_lock; /* Protects the other structures in aift */
+};
+
+extern struct zpci_aift *aift;
+
+static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
+						 unsigned long si)
+{
+	if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev ||
+	    !aift->kzdev[si])
+		return NULL;
+	return aift->kzdev[si]->kvm;
+};
+
+int kvm_s390_pci_aen_init(u8 nisc);
+void kvm_s390_pci_aen_exit(void);
+
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
+
+int __init kvm_s390_pci_init(void);
+void kvm_s390_pci_exit(void);
+
+static inline bool kvm_s390_pci_interp_allowed(void)
+{
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2817:
+	case 0x2818:
+	case 0x2827:
+	case 0x2828:
+	case 0x2964:
+	case 0x2965:
+		/* No SHM on certain machines */
+		return false;
+	default:
+		return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
+			sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
+			sclp.has_aisii);
+	}
+}
+
+#endif /* __KVM_S390_PCI_H */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 83bb5cf97282..9253c70897a8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -13,7 +13,7 @@
 #include <linux/errno.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
-
+#include <linux/io.h>
 #include <asm/asm-offsets.h>
 #include <asm/facility.h>
 #include <asm/current.h>
@@ -22,7 +22,6 @@
 #include <asm/sysinfo.h>
 #include <asm/page-states.h>
 #include <asm/gmap.h>
-#include <asm/io.h>
 #include <asm/ptrace.h>
 #include <asm/sclp.h>
 #include <asm/ap.h>
@@ -58,7 +57,7 @@ static int handle_gs(struct kvm_vcpu *vcpu)
 	if (test_kvm_facility(vcpu->kvm, 133)) {
 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
 		preempt_disable();
-		__ctl_set_bit(2, 4);
+		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
 		current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
 		restore_gs_cb(current->thread.gs_cb);
 		preempt_enable();
@@ -150,7 +149,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
 	 * first page, since address is 8k aligned and memory pieces are always
 	 * at least 1MB aligned and have at least a size of 1MB.
 	 */
-	if (kvm_is_error_gpa(vcpu->kvm, address))
+	if (!kvm_is_gpa_in_memslot(vcpu->kvm, address))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 	kvm_s390_set_prefix(vcpu, address);
@@ -442,7 +441,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 	vcpu->stat.instruction_ipte_interlock++;
 	if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
 	kvm_s390_retry_instr(vcpu);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
 	return 0;
@@ -465,7 +464,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 	addr = kvm_s390_real_to_abs(vcpu, addr);
 
-	if (kvm_is_error_gpa(vcpu->kvm, addr))
+	if (!kvm_is_gpa_in_memslot(vcpu->kvm, addr))
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	/*
 	 * We don't expect errors on modern systems, and do not care
@@ -677,8 +676,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
 	if (vcpu->kvm->arch.crypto.pqap_hook) {
 		pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook;
 		ret = pqap_hook(vcpu);
-		if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
-			kvm_s390_set_psw_cc(vcpu, 3);
+		if (!ret) {
+			if (vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+				kvm_s390_set_psw_cc(vcpu, 3);
+			else
+				kvm_s390_set_psw_cc(vcpu, 0);
+		}
 		up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem);
 		return ret;
 	}
@@ -794,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+	psw_t new_psw;
+	u64 addr;
+	int rc;
+	u8 ar;
+
+	vcpu->stat.instruction_lpswey++;
+
+	if (!test_kvm_facility(vcpu->kvm, 193))
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	vcpu->arch.sie_block->gpsw = new_psw;
+	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	return 0;
+}
+
 static int handle_stidp(struct kvm_vcpu *vcpu)
 {
 	u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -873,10 +906,18 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	if (fc > 3) {
-		kvm_s390_set_psw_cc(vcpu, 3);
-		return 0;
-	}
+	/* Bailout forbidden function codes */
+	if (fc > 3 && fc != 15)
+		goto out_no_data;
+
+	/*
+	 * fc 15 is provided only with
+	 *   - PTF/CPU topology support through facility 15
+	 *   - KVM_CAP_S390_USER_STSI
+	 */
+	if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
+			 !vcpu->kvm->arch.user_stsi))
+		goto out_no_data;
 
 	if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
 	    || vcpu->run->s.regs.gprs[1] & 0xffff0000)
@@ -910,10 +951,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 			goto out_no_data;
 		handle_stsi_3_2_2(vcpu, (void *) mem);
 		break;
+	case 15: /* fc 15 is fully handled in userspace */
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+		return -EREMOTE;
 	}
 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
-		memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
-		       PAGE_SIZE);
+		memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE);
 		rc = 0;
 	} else {
 		rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
@@ -1204,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
 
 static int handle_essa(struct kvm_vcpu *vcpu)
 {
+	lockdep_assert_held(&vcpu->kvm->srcu);
+
 	/* entries expected to be 1FF */
 	int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
 	unsigned long *cbrlo;
@@ -1253,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		/* Retry the ESSA instruction */
 		kvm_s390_retry_instr(vcpu);
 	} else {
-		int srcu_idx;
-
 		mmap_read_lock(vcpu->kvm->mm);
-		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		i = __do_essa(vcpu, orc);
-		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		mmap_read_unlock(vcpu->kvm->mm);
 		if (i < 0)
 			return i;
@@ -1448,6 +1490,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
 	case 0x61:
 	case 0x62:
 		return handle_ri(vcpu);
+	case 0x71:
+		return handle_lpswey(vcpu);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1471,7 +1515,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 	access_key = (operand2 & 0xf0) >> 4;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
-		ipte_lock(vcpu);
+		ipte_lock(vcpu->kvm);
 
 	ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
 					       GACC_STORE, access_key);
@@ -1508,7 +1552,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
-		ipte_unlock(vcpu);
+		ipte_unlock(vcpu->kvm);
 	return ret;
 }
 
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index cc7c9599f43e..14c330ec8ceb 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -7,13 +7,120 @@
  */
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/minmax.h>
 #include <linux/pagemap.h>
 #include <linux/sched/signal.h>
 #include <asm/gmap.h>
 #include <asm/uv.h>
 #include <asm/mman.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+	return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_held(&vcpu->mutex);
+	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
+/**
+ * kvm_s390_pv_make_secure() - make one guest page secure
+ * @kvm: the guest
+ * @gaddr: the guest address that needs to be made secure
+ * @uvcb: the UVCB specifying which operation needs to be performed
+ *
+ * Context: needs to be called with kvm->srcu held.
+ * Return: 0 on success, < 0 in case of error.
+ */
+int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
+{
+	unsigned long vmaddr;
+
+	lockdep_assert_held(&kvm->srcu);
+
+	vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
+	if (kvm_is_error_hva(vmaddr))
+		return -EFAULT;
+	return make_hva_secure(kvm->mm, vmaddr, uvcb);
+}
+
+int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = gaddr,
+	};
+
+	return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
+}
+
+/**
+ * kvm_s390_pv_destroy_page() - Destroy a guest page.
+ * @kvm: the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ *
+ * Context: may sleep.
+ */
+int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
+{
+	struct page *page;
+	int rc = 0;
+
+	mmap_read_lock(kvm->mm);
+	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
+	if (page)
+		rc = __kvm_s390_pv_destroy_page(page);
+	kvm_release_page_clean(page);
+	mmap_read_unlock(kvm->mm);
+	return rc;
+}
+
+/**
+ * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
+ * be destroyed
+ *
+ * @list: list head for the list of leftover VMs
+ * @old_gmap_table: the gmap table of the leftover protected VM
+ * @handle: the handle of the leftover protected VM
+ * @stor_var: pointer to the variable storage of the leftover protected VM
+ * @stor_base: address of the base storage of the leftover protected VM
+ *
+ * Represents a protected VM that is still registered with the Ultravisor,
+ * but which does not correspond any longer to an active KVM VM. It should
+ * be destroyed at some point later, either asynchronously or when the
+ * process terminates.
+ */
+struct pv_vm_to_be_destroyed {
+	struct list_head list;
+	unsigned long old_gmap_table;
+	u64 handle;
+	void *stor_var;
+	unsigned long stor_base;
+};
+
+static void kvm_s390_clear_pv_state(struct kvm *kvm)
+{
+	kvm->arch.pv.handle = 0;
+	kvm->arch.pv.guest_len = 0;
+	kvm->arch.pv.stor_base = 0;
+	kvm->arch.pv.stor_var = NULL;
+}
+
 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 {
 	int cc;
@@ -32,7 +139,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 		free_pages(vcpu->arch.pv.stor_base,
 			   get_order(uv_info.guest_cpu_stor_len));
 
-	free_page(sida_origin(vcpu->arch.sie_block));
+	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
 	vcpu->arch.sie_block->pv_handle_cpu = 0;
 	vcpu->arch.sie_block->pv_handle_config = 0;
 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
@@ -54,6 +161,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
 		.header.len = sizeof(uvcb),
 	};
+	void *sida_addr;
 	int cc;
 
 	if (kvm_s390_pv_cpu_get_handle(vcpu))
@@ -67,16 +175,17 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 	/* Input */
 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
 	uvcb.num = vcpu->arch.sie_block->icpua;
-	uvcb.state_origin = (u64)vcpu->arch.sie_block;
-	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
+	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
 
 	/* Alloc Secure Instruction Data Area Designation */
-	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-	if (!vcpu->arch.sie_block->sidad) {
+	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!sida_addr) {
 		free_pages(vcpu->arch.pv.stor_base,
 			   get_order(uv_info.guest_cpu_stor_len));
 		return -ENOMEM;
 	}
+	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
 
 	cc = uv_call(0, (u64)&uvcb);
 	*rc = uvcb.header.rc;
@@ -108,7 +217,7 @@ static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
 	vfree(kvm->arch.pv.stor_var);
 	free_pages(kvm->arch.pv.stor_base,
 		   get_order(uv_info.guest_base_stor_len));
-	memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+	kvm_s390_clear_pv_state(kvm);
 }
 
 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
@@ -147,26 +256,363 @@ out_err:
 	return -ENOMEM;
 }
 
-/* this should not fail, but if it does, we must not free the donated memory */
-int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+/**
+ * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
+ * @kvm: the KVM that was associated with this leftover protected VM
+ * @leftover: details about the leftover protected VM that needs a clean up
+ * @rc: the RC code of the Destroy Secure Configuration UVC
+ * @rrc: the RRC code of the Destroy Secure Configuration UVC
+ *
+ * Destroy one leftover protected VM.
+ * On success, kvm->mm->context.protected_count will be decremented atomically
+ * and all other resources used by the VM will be freed.
+ *
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
+					    struct pv_vm_to_be_destroyed *leftover,
+					    u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	/* It used the destroy-fast UVC, nothing left to do here */
+	if (!leftover->handle)
+		goto done_fast;
+	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
+	if (cc)
+		return cc;
+	/*
+	 * Intentionally leak unusable memory. If the UVC fails, the memory
+	 * used for the VM and its metadata is permanently unusable.
+	 * This can only happen in case of a serious KVM or hardware bug; it
+	 * is not expected to happen in normal operation.
+	 */
+	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
+	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
+	vfree(leftover->stor_var);
+done_fast:
+	atomic_dec(&kvm->mm->context.protected_count);
+	return 0;
+}
+
+/**
+ * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
+ * @kvm: the VM whose memory is to be cleared.
+ *
+ * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ */
+static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
+{
+	const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
+	struct kvm_memory_slot *slot;
+	unsigned long len;
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	/* Take the memslot containing guest absolute address 0 */
+	slot = gfn_to_memslot(kvm, 0);
+	/* Clear all slots or parts thereof that are below 2GB */
+	while (slot && slot->base_gfn < pages_2g) {
+		len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
+		s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
+		/* Take the next memslot */
+		slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
+	}
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
+	struct uv_cb_destroy_fast uvcb = {
+		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
+		.header.len = sizeof(uvcb),
+		.handle = kvm_s390_pv_get_handle(kvm),
+	};
 	int cc;
 
-	/* make all pages accessible before destroying the guest */
-	s390_reset_acc(kvm->mm);
+	cc = uv_call_sched(0, (u64)&uvcb);
+	if (rc)
+		*rc = uvcb.header.rc;
+	if (rrc)
+		*rrc = uvcb.header.rrc;
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
+		     uvcb.header.rc, uvcb.header.rrc);
+	WARN_ONCE(cc && uvcb.header.rc != 0x104,
+		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
+	/* Intended memory leak on "impossible" error */
+	if (!cc)
+		kvm_s390_pv_dealloc_vm(kvm);
+	return cc ? -EIO : 0;
+}
+
+static inline bool is_destroy_fast_available(void)
+{
+	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
+}
+
+/**
+ * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
+ * @kvm: the VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Set aside the protected VM for a subsequent teardown. The VM will be able
+ * to continue immediately as a non-secure VM, and the information needed to
+ * properly tear down the protected VM is set aside. If another protected VM
+ * was already set aside without starting its teardown, this function will
+ * fail.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, -EINVAL if another protected VM was already set
+ * aside, -ENOMEM if the system ran out of memory.
+ */
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *priv;
+	int res = 0;
+
+	lockdep_assert_held(&kvm->lock);
+	/*
+	 * If another protected VM was already prepared for teardown, refuse.
+	 * A normal deinitialization has to be performed instead.
+	 */
+	if (kvm->arch.pv.set_aside)
+		return -EINVAL;
+
+	/* Guest with segment type ASCE, refuse to destroy asynchronously */
+	if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
+		return -EINVAL;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	if (is_destroy_fast_available()) {
+		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
+	} else {
+		priv->stor_var = kvm->arch.pv.stor_var;
+		priv->stor_base = kvm->arch.pv.stor_base;
+		priv->handle = kvm_s390_pv_get_handle(kvm);
+		priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
+		WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+		if (s390_replace_asce(kvm->arch.gmap))
+			res = -ENOMEM;
+	}
+
+	if (res) {
+		kfree(priv);
+		return res;
+	}
+
+	kvm_s390_destroy_lower_2g(kvm);
+	kvm_s390_clear_pv_state(kvm);
+	kvm->arch.pv.set_aside = priv;
+
+	*rc = UVC_RC_EXECUTED;
+	*rrc = 42;
+	return 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
+ * @kvm: the KVM whose protected VM needs to be deinitialized
+ * @rc: the RC code of the UVC
+ * @rrc: the RRC code of the UVC
+ *
+ * Deinitialize the current protected VM. This function will destroy and
+ * cleanup the current protected VM, but it will not cleanup the guest
+ * memory. This function should only be called when the protected VM has
+ * just been created and therefore does not have any guest memory, or when
+ * the caller cleans up the guest memory separately.
+ *
+ * This function should not fail, but if it does, the donated memory must
+ * not be freed.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int cc;
 
 	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
 			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
-	atomic_set(&kvm->mm->context.is_protected, 0);
+	if (!cc) {
+		atomic_dec(&kvm->mm->context.protected_count);
+		kvm_s390_pv_dealloc_vm(kvm);
+	} else {
+		/* Intended memory leak on "impossible" error */
+		s390_replace_asce(kvm->arch.gmap);
+	}
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
 	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
-	/* Inteded memory leak on "impossible" error */
-	if (!cc)
-		kvm_s390_pv_dealloc_vm(kvm);
+
+	return cc ? -EIO : 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
+ * with a specific KVM.
+ * @kvm: the KVM to be cleaned up
+ * @rc: the RC code of the first failing UVC
+ * @rrc: the RRC code of the first failing UVC
+ *
+ * This function will clean up all protected VMs associated with a KVM.
+ * This includes the active one, the one prepared for deinitialization with
+ * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
+ *
+ * Context: kvm->lock needs to be held unless being called from
+ * kvm_arch_destroy_vm.
+ *
+ * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *cur;
+	bool need_zap = false;
+	u16 _rc, _rrc;
+	int cc = 0;
+
+	/*
+	 * Nothing to do if the counter was already 0. Otherwise make sure
+	 * the counter does not reach 0 before calling s390_uv_destroy_range.
+	 */
+	if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
+		return 0;
+
+	*rc = 1;
+	/* If the current VM is protected, destroy it */
+	if (kvm_s390_pv_get_handle(kvm)) {
+		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
+		need_zap = true;
+	}
+
+	/* If a previous protected VM was set aside, put it in the need_cleanup list */
+	if (kvm->arch.pv.set_aside) {
+		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
+		kvm->arch.pv.set_aside = NULL;
+	}
+
+	/* Cleanup all protected VMs in the need_cleanup list */
+	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
+		need_zap = true;
+		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
+			cc = 1;
+			/*
+			 * Only return the first error rc and rrc, so make
+			 * sure it is not overwritten. All destroys will
+			 * additionally be reported via KVM_UV_EVENT().
+			 */
+			if (*rc == UVC_RC_EXECUTED) {
+				*rc = _rc;
+				*rrc = _rrc;
+			}
+		}
+		list_del(&cur->list);
+		kfree(cur);
+	}
+
+	/*
+	 * If the mm still has a mapping, try to mark all its pages as
+	 * accessible. The counter should not reach zero before this
+	 * cleanup has been performed.
+	 */
+	if (need_zap && mmget_not_zero(kvm->mm)) {
+		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+		mmput(kvm->mm);
+	}
+
+	/* Now the counter can safely reach 0 */
+	atomic_dec(&kvm->mm->context.protected_count);
 	return cc ? -EIO : 0;
 }
 
+/**
+ * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
+ * @kvm: the VM previously associated with the protected VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Tear down the protected VM that had been previously prepared for teardown
+ * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
+ * userspace asynchronously from a separate thread.
+ *
+ * Context: kvm->lock must not be held.
+ *
+ * Return: 0 in case of success, -EINVAL if no protected VM had been
+ * prepared for asynchronous teardowm, -EIO in case of other errors.
+ */
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *p;
+	int ret = 0;
+
+	lockdep_assert_not_held(&kvm->lock);
+	mutex_lock(&kvm->lock);
+	p = kvm->arch.pv.set_aside;
+	kvm->arch.pv.set_aside = NULL;
+	mutex_unlock(&kvm->lock);
+	if (!p)
+		return -EINVAL;
+
+	/* When a fatal signal is received, stop immediately */
+	if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
+		goto done;
+	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
+		ret = -EIO;
+	kfree(p);
+	p = NULL;
+done:
+	/*
+	 * p is not NULL if we aborted because of a fatal signal, in which
+	 * case queue the leftover for later cleanup.
+	 */
+	if (p) {
+		mutex_lock(&kvm->lock);
+		list_add(&p->list, &kvm->arch.pv.need_cleanup);
+		mutex_unlock(&kvm->lock);
+		/* Did not finish, but pretend things went well */
+		*rc = UVC_RC_EXECUTED;
+		*rrc = 42;
+	}
+	return ret;
+}
+
+static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
+					     struct mm_struct *mm)
+{
+	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
+	u16 dummy;
+	int r;
+
+	/*
+	 * No locking is needed since this is the last thread of the last user of this
+	 * struct mm.
+	 * When the struct kvm gets deinitialized, this notifier is also
+	 * unregistered. This means that if this notifier runs, then the
+	 * struct kvm is still valid.
+	 */
+	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
+		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
+}
+
+static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
+	.release = kvm_s390_pv_mmu_notifier_release,
+};
+
 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
 	struct uv_cb_cgc uvcb = {
@@ -184,27 +630,38 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
 	uvcb.guest_asce = kvm->arch.gmap->asce;
-	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
-	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
+	uvcb.conf_base_stor_origin =
+		virt_to_phys((void *)kvm->arch.pv.stor_base);
 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
 
 	cc = uv_call_sched(0, (u64)&uvcb);
 	*rc = uvcb.header.rc;
 	*rrc = uvcb.header.rrc;
-	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
-		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
 
 	/* Outputs */
 	kvm->arch.pv.handle = uvcb.guest_handle;
 
+	atomic_inc(&kvm->mm->context.protected_count);
 	if (cc) {
-		if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
 			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
-		else
+		} else {
+			atomic_dec(&kvm->mm->context.protected_count);
 			kvm_s390_pv_dealloc_vm(kvm);
+		}
 		return -EIO;
 	}
 	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+	/* Add the notifier only once. No races because we hold kvm->lock */
+	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+		mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+	}
 	return 0;
 }
 
@@ -224,8 +681,6 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
 	*rrc = uvcb.header.rrc;
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
 		     *rc, *rrc);
-	if (!cc)
-		atomic_set(&kvm->mm->context.is_protected, 1);
 	return cc ? -EINVAL : 0;
 }
 
@@ -240,11 +695,29 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
 		.tweak[0] = tweak,
 		.tweak[1] = offset,
 	};
-	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+	int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
+	unsigned long vmaddr;
+	bool unlocked;
 
 	*rc = uvcb.header.rc;
 	*rrc = uvcb.header.rrc;
 
+	if (ret == -ENXIO) {
+		mmap_read_lock(kvm->mm);
+		vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr));
+		if (kvm_is_error_hva(vmaddr)) {
+			ret = -EFAULT;
+		} else {
+			ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+			if (!ret)
+				ret = __gmap_link(kvm->arch.gmap, addr, vmaddr);
+		}
+		mmap_read_unlock(kvm->mm);
+		if (!ret)
+			return -EAGAIN;
+		return ret;
+	}
+
 	if (ret && ret != -EAGAIN)
 		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
 			     uvcb.gaddr, *rc, *rrc);
@@ -263,6 +736,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
 		     addr, size);
 
+	guard(srcu)(&kvm->srcu);
+
 	while (offset < size) {
 		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
 		if (ret == -EAGAIN) {
@@ -298,3 +773,200 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
 		return -EINVAL;
 	return 0;
 }
+
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_cpu uvcb = {
+		.header.cmd = UVC_CMD_DUMP_CPU,
+		.header.len = sizeof(uvcb),
+		.cpu_handle = vcpu->arch.pv.handle,
+		.dump_area_origin = (u64)buff,
+	};
+	int cc;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	return cc;
+}
+
+/* Size of the cache for the storage state dump data. 1MB for now */
+#define DUMP_BUFF_LEN HPAGE_SIZE
+
+/**
+ * kvm_s390_pv_dump_stor_state
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @gaddr: Starting absolute guest address for which the storage state
+ *	   is requested.
+ * @buff_user_len: Length of the buff_user buffer
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Stores buff_len bytes of tweak component values to buff_user
+ * starting with the 1MB block specified by the absolute guest address
+ * (gaddr). The gaddr pointer will be updated with the last address
+ * for which data was written when returning to userspace. buff_user
+ * might be written to even if an error rc is returned. For instance
+ * if we encounter a fault after writing the first page of data.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ *  0 on success
+ *  -ENOMEM if allocating the cache fails
+ *  -EINVAL if gaddr is not aligned to 1MB
+ *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
+ *  -EINVAL if the UV call fails, rc and rrc will be set in this case
+ *  -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_stor_state uvcb = {
+		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
+		.header.len = sizeof(uvcb),
+		.config_handle = kvm->arch.pv.handle,
+		.gaddr = *gaddr,
+		.dump_area_origin = 0,
+	};
+	const u64 increment_len = uv_info.conf_dump_storage_state_len;
+	size_t buff_kvm_size;
+	size_t size_done = 0;
+	u8 *buff_kvm = NULL;
+	int cc, ret;
+
+	ret = -EINVAL;
+	/* UV call processes 1MB guest storage chunks at a time */
+	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
+		goto out;
+
+	/*
+	 * We provide the storage state for 1MB chunks of guest
+	 * storage. The buffer will need to be aligned to
+	 * conf_dump_storage_state_len so we don't end on a partial
+	 * chunk.
+	 */
+	if (!buff_user_len ||
+	    !IS_ALIGNED(buff_user_len, increment_len))
+		goto out;
+
+	/*
+	 * Allocate a buffer from which we will later copy to the user
+	 * process. We don't want userspace to dictate our buffer size
+	 * so we limit it to DUMP_BUFF_LEN.
+	 */
+	ret = -ENOMEM;
+	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
+	buff_kvm = vzalloc(buff_kvm_size);
+	if (!buff_kvm)
+		goto out;
+
+	ret = 0;
+	uvcb.dump_area_origin = (u64)buff_kvm;
+	/* We will loop until the user buffer is filled or an error occurs */
+	do {
+		/* Get 1MB worth of guest storage state data */
+		cc = uv_call_sched(0, (u64)&uvcb);
+
+		/* All or nothing */
+		if (cc) {
+			ret = -EINVAL;
+			break;
+		}
+
+		size_done += increment_len;
+		uvcb.dump_area_origin += increment_len;
+		buff_user_len -= increment_len;
+		uvcb.gaddr += HPAGE_SIZE;
+
+		/* KVM Buffer full, time to copy to the process */
+		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
+			if (copy_to_user(buff_user, buff_kvm, size_done)) {
+				ret = -EFAULT;
+				break;
+			}
+
+			buff_user += size_done;
+			size_done = 0;
+			uvcb.dump_area_origin = (u64)buff_kvm;
+		}
+	} while (buff_user_len);
+
+	/* Report back where we ended dumping */
+	*gaddr = uvcb.gaddr;
+
+	/* Lets only log errors, we don't want to spam */
+out:
+	if (ret)
+		KVM_UV_EVENT(kvm, 3,
+			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
+			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	vfree(buff_kvm);
+
+	return ret;
+}
+
+/**
+ * kvm_s390_pv_dump_complete
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Completes the dumping operation and writes the completion data to
+ * user space.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ *  0 on success
+ *  -ENOMEM if allocating the completion buffer fails
+ *  -EINVAL if the UV call fails, rc and rrc will be set in this case
+ *  -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+			      u16 *rc, u16 *rrc)
+{
+	struct uv_cb_dump_complete complete = {
+		.header.len = sizeof(complete),
+		.header.cmd = UVC_CMD_DUMP_COMPLETE,
+		.config_handle = kvm_s390_pv_get_handle(kvm),
+	};
+	u64 *compl_data;
+	int ret;
+
+	/* Allocate dump area */
+	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
+	if (!compl_data)
+		return -ENOMEM;
+	complete.dump_area_origin = (u64)compl_data;
+
+	ret = uv_call_sched(0, (u64)&complete);
+	*rc = complete.header.rc;
+	*rrc = complete.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
+		     complete.header.rc, complete.header.rrc);
+
+	if (!ret) {
+		/*
+		 * kvm_s390_pv_dealloc_vm() will also (mem)set
+		 * this to false on a reboot or other destroy
+		 * operation for this vm.
+		 */
+		kvm->arch.pv.dumping = false;
+		kvm_s390_vcpu_unblock_all(kvm);
+		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
+		if (ret)
+			ret = -EFAULT;
+	}
+	vfree(compl_data);
+	/* If the UVC returned an error, translate it to -EINVAL */
+	if (ret > 0)
+		ret = -EINVAL;
+	return ret;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 8aaee2892ec3..55c34cb35428 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -172,7 +172,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 	 * first page, since address is 8k aligned and memory pieces are always
 	 * at least 1MB aligned and have at least a size of 1MB.
 	 */
-	if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+	if (!kvm_is_gpa_in_memslot(vcpu->kvm, irq.u.prefix.address)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INVALID_PARAMETER;
 		return SIGP_CC_STATUS_STORED;
@@ -469,7 +469,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
  *
  * This interception will occur at the source cpu when a source cpu sends an
  * external call to a target cpu and the target cpu has the WAIT bit set in
- * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * its cpuflags. Interception will occur after the interrupt indicator bits at
  * the target cpu have been set. All error cases will lead to instruction
  * interception, therefore nothing is to be checked or prepared.
  */
@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *dest_vcpu;
 	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 
-	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
-
 	if (order_code == SIGP_EXTERNAL_CALL) {
+		trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
 		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
 		BUG_ON(dest_vcpu == NULL);
 
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 6f0209d45164..9e28f165c114 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
 		    __entry->sie_block = sie_block;
 		    ),
 
-	    TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+	    TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
 		      __entry->id, __entry->vcpu, __entry->sie_block)
 	);
 
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
 		    __entry->kvm = kvm;
 		    ),
 
-	    TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
 		      __entry->kvm)
 	);
 
@@ -333,6 +333,29 @@ TRACE_EVENT(kvm_s390_airq_suppressed,
 		      __entry->id, __entry->isc)
 	);
 
+/*
+ * Trace point for gmap notifier calls.
+ */
+TRACE_EVENT(kvm_s390_gmap_notifier,
+	    TP_PROTO(unsigned long start, unsigned long end, unsigned int shadow),
+	    TP_ARGS(start, end, shadow),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long, start)
+		    __field(unsigned long, end)
+		    __field(unsigned int, shadow)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->start = start;
+		    __entry->end = end;
+		    __entry->shadow = shadow;
+		    ),
+
+	    TP_printk("gmap notified (start:0x%lx end:0x%lx shadow:%d)",
+		      __entry->start, __entry->end, __entry->shadow)
+	);
+
 
 #endif /* _TRACE_KVMS390_H */
 
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index dada78b92691..13a9661d2b28 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -12,16 +12,22 @@
 #include <linux/list.h>
 #include <linux/bitmap.h>
 #include <linux/sched/signal.h>
+#include <linux/io.h>
+#include <linux/mman.h>
 
 #include <asm/gmap.h>
 #include <asm/mmu_context.h>
 #include <asm/sclp.h>
 #include <asm/nmi.h>
 #include <asm/dis.h>
-#include <asm/fpu/api.h>
+#include <asm/facility.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
+enum vsie_page_flags {
+	VSIE_PAGE_IN_USE = 0,
+};
+
 struct vsie_page {
 	struct kvm_s390_sie_block scb_s;	/* 0x0000 */
 	/*
@@ -45,11 +51,40 @@ struct vsie_page {
 	gpa_t gvrd_gpa;				/* 0x0240 */
 	gpa_t riccbd_gpa;			/* 0x0248 */
 	gpa_t sdnx_gpa;				/* 0x0250 */
-	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
+	/*
+	 * guest address of the original SCB. Remains set for free vsie
+	 * pages, so we can properly look them up in our addr_to_page
+	 * radix tree.
+	 */
+	gpa_t scb_gpa;				/* 0x0258 */
+	/*
+	 * Flags: must be set/cleared atomically after the vsie page can be
+	 * looked up by other CPUs.
+	 */
+	unsigned long flags;			/* 0x0260 */
+	__u8 reserved[0x0700 - 0x0268];		/* 0x0268 */
 	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
 	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
 };
 
+/**
+ * gmap_shadow_valid() - check if a shadow guest address space matches the
+ *                       given properties and is still valid
+ * @sg: pointer to the shadow guest address space structure
+ * @asce: ASCE for which the shadow table is requested
+ * @edat_level: edat level to be used for the shadow translation
+ *
+ * Returns 1 if the gmap shadow is still valid and matches the given
+ * properties, the caller can continue using it. Returns 0 otherwise; the
+ * caller has to request a new shadow gmap in this case.
+ */
+int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
+{
+	if (sg->removed)
+		return 0;
+	return sg->orig_asce == asce && sg->edat_level == edat_level;
+}
+
 /* trigger a validity icpt for the given scb */
 static int set_validity_icpt(struct kvm_s390_sie_block *scb,
 			     __u16 reason_code)
@@ -138,11 +173,15 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 }
 /* Copy to APCB FORMAT1 from APCB FORMAT0 */
 static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
-			unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+			unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
 {
 	struct kvm_s390_apcb0 tmp;
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
 
-	if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+	if (read_guest_real(vcpu, apcb_gpa, &tmp,
+			    sizeof(struct kvm_s390_apcb0)))
 		return -EFAULT;
 
 	apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
@@ -157,19 +196,24 @@ static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
  * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
  * @vcpu: pointer to the virtual CPU
  * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original apcb in the guest2
+ * @crycb_gpa: guest physical address to start of original guest crycb
  * @apcb_h: pointer to start of apcb in the guest1
  *
  * Returns 0 and -EFAULT on error reading guest apcb
  */
 static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-			unsigned long apcb_o, unsigned long *apcb_h)
+			unsigned long crycb_gpa, unsigned long *apcb_h)
 {
-	if (read_guest_real(vcpu, apcb_o, apcb_s,
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
+
+	if (read_guest_real(vcpu, apcb_gpa, apcb_s,
 			    sizeof(struct kvm_s390_apcb0)))
 		return -EFAULT;
 
-	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
 
 	return 0;
 }
@@ -178,20 +222,25 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
  * @vcpu: pointer to the virtual CPU
  * @apcb_s: pointer to start of apcb in the shadow crycb
- * @apcb_o: pointer to start of original guest apcb
+ * @crycb_gpa: guest physical address to start of original guest crycb
  * @apcb_h: pointer to start of apcb in the host
  *
  * Returns 0 and -EFAULT on error reading guest apcb
  */
 static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
-			unsigned long apcb_o,
+			unsigned long crycb_gpa,
 			unsigned long *apcb_h)
 {
-	if (read_guest_real(vcpu, apcb_o, apcb_s,
+	unsigned long apcb_gpa;
+
+	apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
+
+	if (read_guest_real(vcpu, apcb_gpa, apcb_s,
 			    sizeof(struct kvm_s390_apcb1)))
 		return -EFAULT;
 
-	bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+	bitmap_and(apcb_s, apcb_s, apcb_h,
+		   BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
 
 	return 0;
 }
@@ -200,7 +249,7 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * setup_apcb - Create a shadow copy of the apcb.
  * @vcpu: pointer to the virtual CPU
  * @crycb_s: pointer to shadow crycb
- * @crycb_o: pointer to original guest crycb
+ * @crycb_gpa: guest physical address of original guest crycb
  * @crycb_h: pointer to the host crycb
  * @fmt_o: format of the original guest crycb.
  * @fmt_h: format of the host crycb.
@@ -211,50 +260,46 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
  * Return 0 or an error number if the guest and host crycb are incompatible.
  */
 static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
-	       const u32 crycb_o,
+	       const u32 crycb_gpa,
 	       struct kvm_s390_crypto_cb *crycb_h,
 	       int fmt_o, int fmt_h)
 {
-	struct kvm_s390_crypto_cb *crycb;
-
-	crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
-
 	switch (fmt_o) {
 	case CRYCB_FORMAT2:
-		if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+		if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
 			return -EACCES;
 		if (fmt_h != CRYCB_FORMAT2)
 			return -EINVAL;
 		return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
-				    (unsigned long) &crycb->apcb1,
+				    crycb_gpa,
 				    (unsigned long *)&crycb_h->apcb1);
 	case CRYCB_FORMAT1:
 		switch (fmt_h) {
 		case CRYCB_FORMAT2:
 			return setup_apcb10(vcpu, &crycb_s->apcb1,
-					    (unsigned long) &crycb->apcb0,
+					    crycb_gpa,
 					    &crycb_h->apcb1);
 		case CRYCB_FORMAT1:
 			return setup_apcb00(vcpu,
 					    (unsigned long *) &crycb_s->apcb0,
-					    (unsigned long) &crycb->apcb0,
+					    crycb_gpa,
 					    (unsigned long *) &crycb_h->apcb0);
 		}
 		break;
 	case CRYCB_FORMAT0:
-		if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+		if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
 			return -EACCES;
 
 		switch (fmt_h) {
 		case CRYCB_FORMAT2:
 			return setup_apcb10(vcpu, &crycb_s->apcb1,
-					    (unsigned long) &crycb->apcb0,
+					    crycb_gpa,
 					    &crycb_h->apcb1);
 		case CRYCB_FORMAT1:
 		case CRYCB_FORMAT0:
 			return setup_apcb00(vcpu,
 					    (unsigned long *) &crycb_s->apcb0,
-					    (unsigned long) &crycb->apcb0,
+					    crycb_gpa,
 					    (unsigned long *) &crycb_h->apcb0);
 		}
 	}
@@ -324,7 +369,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	/* we may only allow it if enabled for guest 2 */
 	ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
 		     (ECB3_AES | ECB3_DEA);
-	ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+	ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd &
+		     (ECD_ECC | ECD_HMAC);
 	if (!ecb3_flags && !ecd_flags)
 		goto end;
 
@@ -351,7 +397,7 @@ end:
 	case -EACCES:
 		return set_validity_icpt(scb_s, 0x003CU);
 	}
-	scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
+	scb_s->crycbd = (u32)virt_to_phys(&vsie_page->crycb) | CRYCB_FORMAT2;
 	return 0;
 }
 
@@ -494,7 +540,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	scb_s->mso = new_mso;
 	scb_s->prefix = new_prefix;
 
-	/* We have to definetly flush the tlb if this scb never ran */
+	/* We have to definitely flush the tlb if this scb never ran */
 	if (scb_s->ihcpu != 0xffffU)
 		scb_s->ihcpu = scb_o->ihcpu;
 
@@ -503,6 +549,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	/* Host-protection-interruption introduced with ESOP */
 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
 		scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
+	/*
+	 * CPU Topology
+	 * This facility only uses the utility field of the SCA and none of
+	 * the cpu entries that are problematic with the other interpretation
+	 * facilities so we can pass it through
+	 */
+	if (test_kvm_facility(vcpu->kvm, 11))
+		scb_s->ecb |= scb_o->ecb & ECB_PTF;
 	/* transactional execution */
 	if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
 		/* remap the prefix is tx is toggled on */
@@ -538,8 +592,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
 		scb_s->eca |= scb_o->eca & ECA_CEI;
 	/* Epoch Extension */
-	if (test_kvm_facility(vcpu->kvm, 139))
+	if (test_kvm_facility(vcpu->kvm, 139)) {
 		scb_s->ecd |= scb_o->ecd & ECD_MEF;
+		scb_s->epdx = scb_o->epdx;
+	}
 
 	/* etoken */
 	if (test_kvm_facility(vcpu->kvm, 156))
@@ -562,24 +618,18 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
 	struct kvm *kvm = gmap->private;
 	struct vsie_page *cur;
 	unsigned long prefix;
-	struct page *page;
 	int i;
 
 	if (!gmap_is_shadow(gmap))
 		return;
-	if (start >= 1UL << 31)
-		/* We are only interested in prefix pages */
-		return;
-
 	/*
 	 * Only new shadow blocks are added to the list during runtime,
 	 * therefore we can safely reference them all the time.
 	 */
 	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
-		page = READ_ONCE(kvm->arch.vsie.pages[i]);
-		if (!page)
+		cur = READ_ONCE(kvm->arch.vsie.pages[i]);
+		if (!cur)
 			continue;
-		cur = page_to_virt(page);
 		if (READ_ONCE(cur->gmap) != gmap)
 			continue;
 		prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
@@ -644,16 +694,16 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 	struct page *page;
 
 	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
-	if (is_error_page(page))
+	if (!page)
 		return -EINVAL;
-	*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
+	*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
 	return 0;
 }
 
 /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
 static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
 {
-	kvm_release_pfn_dirty(hpa >> PAGE_SHIFT);
+	kvm_release_page_dirty(pfn_to_page(hpa >> PAGE_SHIFT));
 	/* mark the page always as dirty for migration */
 	mark_page_dirty(kvm, gpa_to_gfn(gpa));
 }
@@ -836,7 +886,7 @@ unpin:
 static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		      gpa_t gpa)
 {
-	hpa_t hpa = (hpa_t) vsie_page->scb_o;
+	hpa_t hpa = virt_to_phys(vsie_page->scb_o);
 
 	if (hpa)
 		unpin_guest_page(vcpu->kvm, gpa, hpa);
@@ -861,7 +911,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		WARN_ON_ONCE(rc);
 		return 1;
 	}
-	vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+	vsie_page->scb_o = phys_to_virt(hpa);
 	return 0;
 }
 
@@ -881,7 +931,7 @@ static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
 			(vaddr & 0xfffffffffffff000UL) |
 			/* 52-53: store / fetch */
 			(((unsigned int) !write_flag) + 1) << 10,
-			/* 62-63: asce id (alway primary == 0) */
+			/* 62-63: asce id (always primary == 0) */
 		.exc_access_id = 0, /* always primary */
 		.op_access_id = 0, /* not MVPG */
 	};
@@ -905,19 +955,19 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
 	int rc;
 
-	if (current->thread.gmap_int_code == PGM_PROTECTION)
+	if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
 		/* we can directly forward all protection exceptions */
 		return inject_fault(vcpu, PGM_PROTECTION,
-				    current->thread.gmap_addr, 1);
+				    current->thread.gmap_teid.addr * PAGE_SIZE, 1);
 
 	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				   current->thread.gmap_addr, NULL);
+				   current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
 	if (rc > 0) {
 		rc = inject_fault(vcpu, rc,
-				  current->thread.gmap_addr,
-				  current->thread.gmap_write_flag);
+				  current->thread.gmap_teid.addr * PAGE_SIZE,
+				  kvm_s390_cur_gmap_fault_is_write());
 		if (rc >= 0)
-			vsie_page->fault_addr = current->thread.gmap_addr;
+			vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
 	}
 	return rc;
 }
@@ -968,14 +1018,28 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
 static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
-	__u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+	__u32 fac = READ_ONCE(vsie_page->scb_o->fac);
 
+	/*
+	 * Alternate-STFLE-Interpretive-Execution facilities are not supported
+	 * -> format-0 flcb
+	 */
 	if (fac && test_kvm_facility(vcpu->kvm, 7)) {
 		retry_vsie_icpt(vsie_page);
+		/*
+		 * The facility list origin (FLO) is in bits 1 - 28 of the FLD
+		 * so we need to mask here before reading.
+		 */
+		fac = fac & 0x7ffffff8U;
+		/*
+		 * format-0 -> size of nested guest's facility list == guest's size
+		 * guest's size == host's size, since STFLE is interpretatively executed
+		 * using a format-0 for the guest, too.
+		 */
 		if (read_guest_real(vcpu, fac, &vsie_page->fac,
-				    sizeof(vsie_page->fac)))
+				    stfle_size() * sizeof(u64)))
 			return set_validity_icpt(scb_s, 0x1090U);
-		scb_s->fac = (__u32)(__u64) &vsie_page->fac;
+		scb_s->fac = (u32)virt_to_phys(&vsie_page->fac);
 	}
 	return 0;
 }
@@ -1117,11 +1181,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	 * also kick the vSIE.
 	 */
 	vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+	current->thread.gmap_int_code = 0;
 	barrier();
-	if (test_cpu_flag(CIF_FPU))
-		load_fpu_regs();
 	if (!kvm_s390_vcpu_sie_inhibited(vcpu))
-		rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+		rc = sie64a(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
 	barrier();
 	vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
 
@@ -1143,7 +1206,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
 	if (rc > 0)
 		rc = 0; /* we could still have an icpt */
-	else if (rc == -EFAULT)
+	else if (current->thread.gmap_int_code)
 		return handle_fault(vcpu, vsie_page);
 
 	switch (scb_s->icptcode) {
@@ -1194,15 +1257,17 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
 	 * we're holding has been unshadowed. If the gmap is still valid,
 	 * we can safely reuse it.
 	 */
-	if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
+	if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
+		vcpu->kvm->stat.gmap_shadow_reuse++;
 		return 0;
+	}
 
 	/* release the old shadow - if any, and mark the prefix as unmapped */
 	release_gmap_shadow(vsie_page);
 	gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
 	if (IS_ERR(gmap))
 		return PTR_ERR(gmap);
-	gmap->private = vcpu->kvm;
+	vcpu->kvm->stat.gmap_shadow_create++;
 	WRITE_ONCE(vsie_page->gmap, gmap);
 	return 0;
 }
@@ -1264,19 +1329,31 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		if (!rc)
 			rc = map_prefix(vcpu, vsie_page);
 		if (!rc) {
-			gmap_enable(vsie_page->gmap);
 			update_intervention_requests(vsie_page);
 			rc = do_vsie_run(vcpu, vsie_page);
-			gmap_enable(vcpu->arch.gmap);
 		}
 		atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
 
 		if (rc == -EAGAIN)
 			rc = 0;
-		if (rc || scb_s->icptcode || signal_pending(current) ||
+
+		/*
+		 * Exit the loop if the guest needs to process the intercept
+		 */
+		if (rc || scb_s->icptcode)
+			break;
+
+		/*
+		 * Exit the loop if the host needs to process an intercept,
+		 * but rewind the PSW to re-enter SIE once that's completed
+		 * instead of passing a "no action" intercept to the guest.
+		 */
+		if (signal_pending(current) ||
 		    kvm_s390_vcpu_has_irq(vcpu, 0) ||
-		    kvm_s390_vcpu_sie_inhibited(vcpu))
+		    kvm_s390_vcpu_sie_inhibited(vcpu)) {
+			kvm_s390_rewind_psw(vcpu, 4);
 			break;
+		}
 		cond_resched();
 	}
 
@@ -1300,6 +1377,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	return rc;
 }
 
+/* Try getting a given vsie page, returning "true" on success. */
+static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
+{
+	if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
+		return false;
+	return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
+/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
+static void put_vsie_page(struct vsie_page *vsie_page)
+{
+	clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+}
+
 /*
  * Get or create a vsie page for a scb address.
  *
@@ -1310,16 +1401,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
 {
 	struct vsie_page *vsie_page;
-	struct page *page;
 	int nr_vcpus;
 
 	rcu_read_lock();
-	page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
+	vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
 	rcu_read_unlock();
-	if (page) {
-		if (page_ref_inc_return(page) == 2)
-			return page_to_virt(page);
-		page_ref_dec(page);
+	if (vsie_page) {
+		if (try_get_vsie_page(vsie_page)) {
+			if (vsie_page->scb_gpa == addr)
+				return vsie_page;
+			/*
+			 * We raced with someone reusing + putting this vsie
+			 * page before we grabbed it.
+			 */
+			put_vsie_page(vsie_page);
+		}
 	}
 
 	/*
@@ -1330,36 +1426,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
 
 	mutex_lock(&kvm->arch.vsie.mutex);
 	if (kvm->arch.vsie.page_count < nr_vcpus) {
-		page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
-		if (!page) {
+		vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
+		if (!vsie_page) {
 			mutex_unlock(&kvm->arch.vsie.mutex);
 			return ERR_PTR(-ENOMEM);
 		}
-		page_ref_inc(page);
-		kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
+		__set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
+		kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
 		kvm->arch.vsie.page_count++;
 	} else {
 		/* reuse an existing entry that belongs to nobody */
 		while (true) {
-			page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
-			if (page_ref_inc_return(page) == 2)
+			vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
+			if (try_get_vsie_page(vsie_page))
 				break;
-			page_ref_dec(page);
 			kvm->arch.vsie.next++;
 			kvm->arch.vsie.next %= nr_vcpus;
 		}
-		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
+		if (vsie_page->scb_gpa != ULONG_MAX)
+			radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+					  vsie_page->scb_gpa >> 9);
 	}
-	page->index = addr;
-	/* double use of the same address */
-	if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
-		page_ref_dec(page);
+	/* Mark it as invalid until it resides in the tree. */
+	vsie_page->scb_gpa = ULONG_MAX;
+
+	/* Double use of the same address or allocation failure. */
+	if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
+			      vsie_page)) {
+		put_vsie_page(vsie_page);
 		mutex_unlock(&kvm->arch.vsie.mutex);
 		return NULL;
 	}
+	vsie_page->scb_gpa = addr;
 	mutex_unlock(&kvm->arch.vsie.mutex);
 
-	vsie_page = page_to_virt(page);
 	memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
 	release_gmap_shadow(vsie_page);
 	vsie_page->fault_addr = 0;
@@ -1367,14 +1467,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
 	return vsie_page;
 }
 
-/* put a vsie page acquired via get_vsie_page */
-static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
-{
-	struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
-
-	page_ref_dec(page);
-}
-
 int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
 {
 	struct vsie_page *vsie_page;
@@ -1395,8 +1487,10 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
-	    kvm_s390_vcpu_sie_inhibited(vcpu))
+	    kvm_s390_vcpu_sie_inhibited(vcpu)) {
+		kvm_s390_rewind_psw(vcpu, 4);
 		return 0;
+	}
 
 	vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
 	if (IS_ERR(vsie_page))
@@ -1423,7 +1517,7 @@ out_unshadow:
 out_unpin_scb:
 	unpin_scb(vcpu, vsie_page, scb_addr);
 out_put:
-	put_vsie_page(vcpu->kvm, vsie_page);
+	put_vsie_page(vsie_page);
 
 	return rc < 0 ? rc : 0;
 }
@@ -1439,18 +1533,18 @@ void kvm_s390_vsie_init(struct kvm *kvm)
 void kvm_s390_vsie_destroy(struct kvm *kvm)
 {
 	struct vsie_page *vsie_page;
-	struct page *page;
 	int i;
 
 	mutex_lock(&kvm->arch.vsie.mutex);
 	for (i = 0; i < kvm->arch.vsie.page_count; i++) {
-		page = kvm->arch.vsie.pages[i];
+		vsie_page = kvm->arch.vsie.pages[i];
 		kvm->arch.vsie.pages[i] = NULL;
-		vsie_page = page_to_virt(page);
 		release_gmap_shadow(vsie_page);
 		/* free the radix tree entry */
-		radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
-		__free_page(page);
+		if (vsie_page->scb_gpa != ULONG_MAX)
+			radix_tree_delete(&kvm->arch.vsie.addr_to_page,
+					  vsie_page->scb_gpa >> 9);
+		free_page((unsigned long)vsie_page);
 	}
 	kvm->arch.vsie.page_count = 0;
 	mutex_unlock(&kvm->arch.vsie.mutex);
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 580d2e3265cb..cd35cdbfa871 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,9 @@
 # Makefile for s390-specific library files..
 #
 
-lib-y += delay.o string.o uaccess.o find.o spinlock.o
+obj-y += crypto/
+lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
+lib-y += csum-partial.o
 obj-y += mem.o xor.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
@@ -22,4 +24,7 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
 
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
-obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
+
+obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
+crc32-s390-y := crc32.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-vx.h b/arch/s390/lib/crc32-vx.h
new file mode 100644
index 000000000000..652c96e1a822
--- /dev/null
+++ b/arch/s390/lib/crc32-vx.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRC32_VX_S390_H
+#define _CRC32_VX_S390_H
+
+#include <linux/types.h>
+
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+#endif /* _CRC32_VX_S390_H */
diff --git a/arch/s390/lib/crc32.c b/arch/s390/lib/crc32.c
new file mode 100644
index 000000000000..3c4b344417c1
--- /dev/null
+++ b/arch/s390/lib/crc32.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CRC-32 implemented with the z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	u32 ___fname(u32 crc, const u8 *data, size_t datalen)		    \
+	{								    \
+		unsigned long prealign, aligned, remaining;		    \
+		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
+									    \
+		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || !cpu_has_vx())  \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}								    \
+	EXPORT_SYMBOL(___fname);
+
+DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
+DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
+DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
+
+u32 crc32_optimizations(void)
+{
+	if (cpu_has_vx()) {
+		return CRC32_LE_OPTIMIZATION |
+		       CRC32_BE_OPTIMIZATION |
+		       CRC32C_OPTIMIZATION;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/lib/crc32be-vx.c
index 6b3d1009c392..fed7c9c70d05 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/lib/crc32be-vx.c
@@ -12,20 +12,17 @@
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <linux/types.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
 
 /* Vector register range containing CRC-32 constants */
-#define CONST_R1R2		%v9
-#define CONST_R3R4		%v10
-#define CONST_R5		%v11
-#define CONST_R6		%v12
-#define CONST_RU_POLY		%v13
-#define CONST_CRC_POLY		%v14
-
-.data
-.align 8
+#define CONST_R1R2		9
+#define CONST_R3R4		10
+#define CONST_R5		11
+#define CONST_R6		12
+#define CONST_RU_POLY		13
+#define CONST_CRC_POLY		14
 
 /*
  * The CRC-32 constant block contains reduction constants to fold and
@@ -48,7 +45,7 @@
  *
  * Note that the constant definitions below are extended in order to compute
  * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The righmost doubleword can be 0 to prevent contribution to the result or
+ * The rightmost doubleword can be 0 to prevent contribution to the result or
  * can be multiplied by 1 to perform an XOR without the need for a separate
  * VECTOR EXCLUSIVE OR instruction.
  *
@@ -58,104 +55,74 @@
  *	P'(x) = 0xEDB88320
  */
 
-.Lconstants_CRC_32_BE:
-	.quad		0x08833794c, 0x0e6228b11	# R1, R2
-	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
-	.quad		0x0f200aa66, 1 << 32		# R5, x32
-	.quad		0x0490d678d, 1			# R6, 1
-	.quad		0x104d101df, 0			# u
-	.quad		0x104C11DB7, 0			# P(x)
-
-.previous
-
-	GEN_BR_THUNK %r14
-
-.text
-/*
- * The CRC-32 function(s) use these calling conventions:
- *
- * Parameters:
- *
- *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
- *	%r3:	Input buffer pointer, performance might be improved if the
- *		buffer is on a doubleword boundary.
- *	%r4:	Length of the buffer, must be 64 bytes or greater.
+static unsigned long constants_CRC_32_BE[] = {
+	0x08833794c, 0x0e6228b11,	/* R1, R2 */
+	0x0c5b9cd4c, 0x0e8a45605,	/* R3, R4 */
+	0x0f200aa66, 1UL << 32,		/* R5, x32 */
+	0x0490d678d, 1,			/* R6, 1 */
+	0x104d101df, 0,			/* u */
+	0x104C11DB7, 0,			/* P(x) */
+};
+
+/**
+ * crc32_be_vgfm_16 - Compute CRC-32 (BE variant) with vector registers
+ * @crc: Initial CRC value, typically ~0.
+ * @buf: Input buffer pointer, performance might be improved if the
+ *	  buffer is on a doubleword boundary.
+ * @size: Size of the buffer, must be 64 bytes or greater.
  *
  * Register usage:
- *
- *	%r5:	CRC-32 constant pool base pointer.
  *	V0:	Initial CRC value and intermediate constants and results.
  *	V1..V4:	Data for CRC computation.
  *	V5..V8:	Next data chunks that are fetched from the input buffer.
- *
  *	V9..V14: CRC-32 constants.
  */
-ENTRY(crc32_be_vgfm_16)
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
 	/* Load CRC-32 constants */
-	larl	%r5,.Lconstants_CRC_32_BE
-	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
+	fpu_vlm(CONST_R1R2, CONST_CRC_POLY, &constants_CRC_32_BE);
+	fpu_vzero(0);
 
 	/* Load the initial CRC value into the leftmost word of V0. */
-	VZERO	%v0
-	VLVGF	%v0,%r2,0
+	fpu_vlvgf(0, crc, 0);
 
 	/* Load a 64-byte data chunk and XOR with CRC */
-	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
-	VX	%v1,%v0,%v1		/* V1 ^= CRC */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	/* Check remaining buffer size and jump to proper folding method */
-	cghi	%r4,64
-	jl	.Lless_than_64bytes
-
-.Lfold_64bytes_loop:
-	/* Load the next 64-byte data chunk into V5 to V8 */
-	VLM	%v5,%v8,0,%r3
+	fpu_vlm(1, 4, buf);
+	fpu_vx(1, 0, 1);
+	buf += 64;
+	size -= 64;
+
+	while (size >= 64) {
+		/* Load the next 64-byte data chunk into V5 to V8 */
+		fpu_vlm(5, 8, buf);
+
+		/*
+		 * Perform a GF(2) multiplication of the doublewords in V1 with
+		 * the reduction constants in V0.  The intermediate result is
+		 * then folded (accumulated) with the next data chunk in V5 and
+		 * stored in V1.  Repeat this step for the register contents
+		 * in V2, V3, and V4 respectively.
+		 */
+		fpu_vgfmag(1, CONST_R1R2, 1, 5);
+		fpu_vgfmag(2, CONST_R1R2, 2, 6);
+		fpu_vgfmag(3, CONST_R1R2, 3, 7);
+		fpu_vgfmag(4, CONST_R1R2, 4, 8);
+		buf += 64;
+		size -= 64;
+	}
 
-	/*
-	 * Perform a GF(2) multiplication of the doublewords in V1 with
-	 * the reduction constants in V0.  The intermediate result is
-	 * then folded (accumulated) with the next data chunk in V5 and
-	 * stored in V1.  Repeat this step for the register contents
-	 * in V2, V3, and V4 respectively.
-	 */
-	VGFMAG	%v1,CONST_R1R2,%v1,%v5
-	VGFMAG	%v2,CONST_R1R2,%v2,%v6
-	VGFMAG	%v3,CONST_R1R2,%v3,%v7
-	VGFMAG	%v4,CONST_R1R2,%v4,%v8
-
-	/* Adjust buffer pointer and length for next loop */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jnl	.Lfold_64bytes_loop
-
-.Lless_than_64bytes:
 	/* Fold V1 to V4 into a single 128-bit value in V1 */
-	VGFMAG	%v1,CONST_R3R4,%v1,%v2
-	VGFMAG	%v1,CONST_R3R4,%v1,%v3
-	VGFMAG	%v1,CONST_R3R4,%v1,%v4
-
-	/* Check whether to continue with 64-bit folding */
-	cghi	%r4,16
-	jl	.Lfinal_fold
+	fpu_vgfmag(1, CONST_R3R4, 1, 2);
+	fpu_vgfmag(1, CONST_R3R4, 1, 3);
+	fpu_vgfmag(1, CONST_R3R4, 1, 4);
 
-.Lfold_16bytes_loop:
+	while (size >= 16) {
+		fpu_vl(2, buf);
+		fpu_vgfmag(1, CONST_R3R4, 1, 2);
+		buf += 16;
+		size -= 16;
+	}
 
-	VL	%v2,0,,%r3		/* Load next data chunk */
-	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
-
-	/* Adjust buffer pointer and size for folding next data chunk */
-	aghi	%r3,16
-	aghi	%r4,-16
-
-	/* Process remaining data chunks */
-	cghi	%r4,16
-	jnl	.Lfold_16bytes_loop
-
-.Lfinal_fold:
 	/*
 	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
 	 * that is XORed with the next 96-bit input data chunk.  To use a single
@@ -163,7 +130,7 @@ ENTRY(crc32_be_vgfm_16)
 	 * form an intermediate 96-bit value (with appended zeros) which is then
 	 * XORed with the intermediate reduction result.
 	 */
-	VGFMG	%v1,CONST_R5,%v1
+	fpu_vgfmg(1, CONST_R5, 1);
 
 	/*
 	 * Further reduce the remaining 96-bit value to a 64-bit value using a
@@ -172,7 +139,7 @@ ENTRY(crc32_be_vgfm_16)
 	 * doubleword with R6.	The result is a 64-bit value and is subject to
 	 * the Barret reduction.
 	 */
-	VGFMG	%v1,CONST_R6,%v1
+	fpu_vgfmg(1, CONST_R6, 1);
 
 	/*
 	 * The input values to the Barret reduction are the degree-63 polynomial
@@ -193,20 +160,15 @@ ENTRY(crc32_be_vgfm_16)
 	 */
 
 	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	VUPLLF	%v2,%v1
-	VGFMG	%v2,CONST_RU_POLY,%v2
+	fpu_vupllf(2, 1);
+	fpu_vgfmg(2, CONST_RU_POLY, 2);
 
 	/*
 	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
 	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
 	 * The final result is in the rightmost word of V2.
 	 */
-	VUPLLF	%v2,%v2
-	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
-
-.Ldone:
-	VLGVF	%r2,%v2,3
-	BR_EX	%r14
-ENDPROC(crc32_be_vgfm_16)
-
-.previous
+	fpu_vupllf(2, 2);
+	fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
+	return fpu_vlgvf(2, 3);
+}
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/lib/crc32le-vx.c
index 71caf0f4ec08..2f629f394df7 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/lib/crc32le-vx.c
@@ -13,20 +13,17 @@
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
 
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <linux/types.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
 
 /* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE	%v9
-#define CONST_R2R1		%v10
-#define CONST_R4R3		%v11
-#define CONST_R5		%v12
-#define CONST_RU_POLY		%v13
-#define CONST_CRC_POLY		%v14
-
-.data
-.align 8
+#define CONST_PERM_LE2BE	9
+#define CONST_R2R1		10
+#define CONST_R4R3		11
+#define CONST_R5		12
+#define CONST_RU_POLY		13
+#define CONST_CRC_POLY		14
 
 /*
  * The CRC-32 constant block contains reduction constants to fold and
@@ -59,62 +56,43 @@
  *	P'(x) = 0x82F63B78
  */
 
-.Lconstants_CRC_32_LE:
-	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
-	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
-	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
-	.octa		0x163cd6124				# R5
-	.octa		0x1F7011641				# u'
-	.octa		0x1DB710641				# P'(x) << 1
-
-.Lconstants_CRC_32C_LE:
-	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
-	.quad		0x09e4addf8, 0x740eef02			# R2, R1
-	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
-	.octa		0x0dd45aab8				# R5
-	.octa		0x0dea713f1				# u'
-	.octa		0x105ec76f0				# P'(x) << 1
-
-.previous
-
-	GEN_BR_THUNK %r14
-
-.text
-
-/*
- * The CRC-32 functions use these calling conventions:
- *
- * Parameters:
- *
- *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
- *	%r3:	Input buffer pointer, performance might be improved if the
- *		buffer is on a doubleword boundary.
- *	%r4:	Length of the buffer, must be 64 bytes or greater.
+static unsigned long constants_CRC_32_LE[] = {
+	0x0f0e0d0c0b0a0908, 0x0706050403020100,	/* BE->LE mask */
+	0x1c6e41596, 0x154442bd4,		/* R2, R1 */
+	0x0ccaa009e, 0x1751997d0,		/* R4, R3 */
+	0x0, 0x163cd6124,			/* R5 */
+	0x0, 0x1f7011641,			/* u' */
+	0x0, 0x1db710641			/* P'(x) << 1 */
+};
+
+static unsigned long constants_CRC_32C_LE[] = {
+	0x0f0e0d0c0b0a0908, 0x0706050403020100,	/* BE->LE mask */
+	0x09e4addf8, 0x740eef02,		/* R2, R1 */
+	0x14cd00bd6, 0xf20c0dfe,		/* R4, R3 */
+	0x0, 0x0dd45aab8,			/* R5 */
+	0x0, 0x0dea713f1,			/* u' */
+	0x0, 0x105ec76f0			/* P'(x) << 1 */
+};
+
+/**
+ * crc32_le_vgfm_generic - Compute CRC-32 (LE variant) with vector registers
+ * @crc: Initial CRC value, typically ~0.
+ * @buf: Input buffer pointer, performance might be improved if the
+ *	 buffer is on a doubleword boundary.
+ * @size: Size of the buffer, must be 64 bytes or greater.
+ * @constants: CRC-32 constant pool base pointer.
  *
  * Register usage:
- *
- *	%r5:	CRC-32 constant pool base pointer.
- *	V0:	Initial CRC value and intermediate constants and results.
- *	V1..V4:	Data for CRC computation.
- *	V5..V8:	Next data chunks that are fetched from the input buffer.
- *	V9:	Constant for BE->LE conversion and shift operations
- *
+ *	V0:	  Initial CRC value and intermediate constants and results.
+ *	V1..V4:	  Data for CRC computation.
+ *	V5..V8:	  Next data chunks that are fetched from the input buffer.
+ *	V9:	  Constant for BE->LE conversion and shift operations
  *	V10..V14: CRC-32 constants.
  */
-
-ENTRY(crc32_le_vgfm_16)
-	larl	%r5,.Lconstants_CRC_32_LE
-	j	crc32_le_vgfm_generic
-ENDPROC(crc32_le_vgfm_16)
-
-ENTRY(crc32c_le_vgfm_16)
-	larl	%r5,.Lconstants_CRC_32C_LE
-	j	crc32_le_vgfm_generic
-ENDPROC(crc32c_le_vgfm_16)
-
-ENTRY(crc32_le_vgfm_generic)
+static u32 crc32_le_vgfm_generic(u32 crc, unsigned char const *buf, size_t size, unsigned long *constants)
+{
 	/* Load CRC-32 constants */
-	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+	fpu_vlm(CONST_PERM_LE2BE, CONST_CRC_POLY, constants);
 
 	/*
 	 * Load the initial CRC value.
@@ -123,90 +101,73 @@ ENTRY(crc32_le_vgfm_generic)
 	 * vector register and is later XORed with the LSB portion
 	 * of the loaded input data.
 	 */
-	VZERO	%v0			/* Clear V0 */
-	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
+	fpu_vzero(0);			/* Clear V0 */
+	fpu_vlvgf(0, crc, 3);		/* Load CRC into rightmost word */
 
 	/* Load a 64-byte data chunk and XOR with CRC */
-	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
-	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
-	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
-	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
-	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
+	fpu_vlm(1, 4, buf);
+	fpu_vperm(1, 1, 1, CONST_PERM_LE2BE);
+	fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
+	fpu_vperm(3, 3, 3, CONST_PERM_LE2BE);
+	fpu_vperm(4, 4, 4, CONST_PERM_LE2BE);
+
+	fpu_vx(1, 0, 1);		/* V1 ^= CRC */
+	buf += 64;
+	size -= 64;
+
+	while (size >= 64) {
+		fpu_vlm(5, 8, buf);
+		fpu_vperm(5, 5, 5, CONST_PERM_LE2BE);
+		fpu_vperm(6, 6, 6, CONST_PERM_LE2BE);
+		fpu_vperm(7, 7, 7, CONST_PERM_LE2BE);
+		fpu_vperm(8, 8, 8, CONST_PERM_LE2BE);
+		/*
+		 * Perform a GF(2) multiplication of the doublewords in V1 with
+		 * the R1 and R2 reduction constants in V0.  The intermediate
+		 * result is then folded (accumulated) with the next data chunk
+		 * in V5 and stored in V1. Repeat this step for the register
+		 * contents in V2, V3, and V4 respectively.
+		 */
+		fpu_vgfmag(1, CONST_R2R1, 1, 5);
+		fpu_vgfmag(2, CONST_R2R1, 2, 6);
+		fpu_vgfmag(3, CONST_R2R1, 3, 7);
+		fpu_vgfmag(4, CONST_R2R1, 4, 8);
+		buf += 64;
+		size -= 64;
+	}
 
-	VX	%v1,%v0,%v1		/* V1 ^= CRC */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jl	.Lless_than_64bytes
-
-.Lfold_64bytes_loop:
-	/* Load the next 64-byte data chunk into V5 to V8 */
-	VLM	%v5,%v8,0,%r3
-	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
-	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
-	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
-	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
-
-	/*
-	 * Perform a GF(2) multiplication of the doublewords in V1 with
-	 * the R1 and R2 reduction constants in V0.  The intermediate result
-	 * is then folded (accumulated) with the next data chunk in V5 and
-	 * stored in V1. Repeat this step for the register contents
-	 * in V2, V3, and V4 respectively.
-	 */
-	VGFMAG	%v1,CONST_R2R1,%v1,%v5
-	VGFMAG	%v2,CONST_R2R1,%v2,%v6
-	VGFMAG	%v3,CONST_R2R1,%v3,%v7
-	VGFMAG	%v4,CONST_R2R1,%v4,%v8
-
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jnl	.Lfold_64bytes_loop
-
-.Lless_than_64bytes:
 	/*
 	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
 	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
 	 * value remains.
 	 */
-	VGFMAG	%v1,CONST_R4R3,%v1,%v2
-	VGFMAG	%v1,CONST_R4R3,%v1,%v3
-	VGFMAG	%v1,CONST_R4R3,%v1,%v4
-
-	cghi	%r4,16
-	jl	.Lfinal_fold
-
-.Lfold_16bytes_loop:
-
-	VL	%v2,0,,%r3		/* Load next data chunk */
-	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
-	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
+	fpu_vgfmag(1, CONST_R4R3, 1, 2);
+	fpu_vgfmag(1, CONST_R4R3, 1, 3);
+	fpu_vgfmag(1, CONST_R4R3, 1, 4);
+
+	while (size >= 16) {
+		fpu_vl(2, buf);
+		fpu_vperm(2, 2, 2, CONST_PERM_LE2BE);
+		fpu_vgfmag(1, CONST_R4R3, 1, 2);
+		buf += 16;
+		size -= 16;
+	}
 
-	aghi	%r3,16
-	aghi	%r4,-16
-
-	cghi	%r4,16
-	jnl	.Lfold_16bytes_loop
-
-.Lfinal_fold:
 	/*
 	 * Set up a vector register for byte shifts.  The shift value must
 	 * be loaded in bits 1-4 in byte element 7 of a vector register.
 	 * Shift by 8 bytes: 0x40
 	 * Shift by 4 bytes: 0x20
 	 */
-	VLEIB	%v9,0x40,7
+	fpu_vleib(9, 0x40, 7);
 
 	/*
 	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
 	 * to move R4 into the rightmost doubleword and set the leftmost
 	 * doubleword to 0x1.
 	 */
-	VSRLB	%v0,CONST_R4R3,%v9
-	VLEIG	%v0,1,0
+	fpu_vsrlb(0, CONST_R4R3, 9);
+	fpu_vleig(0, 1, 0);
 
 	/*
 	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
@@ -214,7 +175,7 @@ ENTRY(crc32_le_vgfm_generic)
 	 * multiplied by 0x1 and is then XORed with rightmost product.
 	 * Implicitly, the intermediate leftmost product becomes padded
 	 */
-	VGFMG	%v1,%v0,%v1
+	fpu_vgfmg(1, 0, 1);
 
 	/*
 	 * Now do the final 32-bit fold by multiplying the rightmost word
@@ -229,10 +190,10 @@ ENTRY(crc32_le_vgfm_generic)
 	 * rightmost doubleword and the leftmost doubleword is zero to ignore
 	 * the leftmost product of V1.
 	 */
-	VLEIB	%v9,0x20,7		  /* Shift by words */
-	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
-	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
-	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
+	fpu_vleib(9, 0x20, 7);		  /* Shift by words */
+	fpu_vsrlb(2, 1, 9);		  /* Store remaining bits in V2 */
+	fpu_vupllf(1, 1);		  /* Split rightmost doubleword */
+	fpu_vgfmag(1, CONST_R5, 1, 2);	  /* V1 = (V1 * R5) XOR V2 */
 
 	/*
 	 * Apply a Barret reduction to compute the final 32-bit CRC value.
@@ -254,20 +215,26 @@ ENTRY(crc32_le_vgfm_generic)
 	 */
 
 	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	VUPLLF	%v2,%v1
-	VGFMG	%v2,CONST_RU_POLY,%v2
+	fpu_vupllf(2, 1);
+	fpu_vgfmg(2, CONST_RU_POLY, 2);
 
 	/*
 	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
 	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
 	 * The final result is stored in word element 2 of V2.
 	 */
-	VUPLLF	%v2,%v2
-	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+	fpu_vupllf(2, 2);
+	fpu_vgfmag(2, CONST_CRC_POLY, 2, 1);
+
+	return fpu_vlgvf(2, 2);
+}
 
-.Ldone:
-	VLGVF	%r2,%v2,2
-	BR_EX	%r14
-ENDPROC(crc32_le_vgfm_generic)
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
+	return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32_LE[0]);
+}
 
-.previous
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size)
+{
+	return crc32_le_vgfm_generic(crc, buf, size, &constants_CRC_32C_LE[0]);
+}
diff --git a/arch/s390/lib/crypto/Kconfig b/arch/s390/lib/crypto/Kconfig
new file mode 100644
index 000000000000..e3f855ef4393
--- /dev/null
+++ b/arch/s390/lib/crypto/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_S390
+	tristate
+	default CRYPTO_LIB_CHACHA
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_SHA256_S390
+	tristate
+	default CRYPTO_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/s390/lib/crypto/Makefile b/arch/s390/lib/crypto/Makefile
new file mode 100644
index 000000000000..5df30f1e7930
--- /dev/null
+++ b/arch/s390/lib/crypto/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
+chacha_s390-y := chacha-glue.o chacha-s390.o
+
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256-s390.o
+sha256-s390-y := sha256.o
diff --git a/arch/s390/lib/crypto/chacha-glue.c b/arch/s390/lib/crypto/chacha-glue.c
new file mode 100644
index 000000000000..f95ba3483bbc
--- /dev/null
+++ b/arch/s390/lib/crypto/chacha-glue.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ChaCha stream cipher (s390 optimized)
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#define KMSG_COMPONENT "chacha_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/chacha.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <asm/fpu.h>
+#include "chacha-s390.h"
+
+void hchacha_block_arch(const struct chacha_state *state,
+			u32 out[HCHACHA_OUT_WORDS], int nrounds)
+{
+	/* TODO: implement hchacha_block_arch() in assembly */
+	hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
+		       unsigned int bytes, int nrounds)
+{
+	/* s390 chacha20 implementation has 20 rounds hard-coded,
+	 * it cannot handle a block of data or less, but otherwise
+	 * it can handle data of arbitrary size
+	 */
+	if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) {
+		chacha_crypt_generic(state, dst, src, bytes, nrounds);
+	} else {
+		DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
+
+		kernel_fpu_begin(&vxstate, KERNEL_VXR);
+		chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]);
+		kernel_fpu_end(&vxstate, KERNEL_VXR);
+
+		state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) /
+				CHACHA_BLOCK_SIZE;
+	}
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+	return cpu_has_vx();
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/lib/crypto/chacha-s390.S
index 9b033622191c..63f3102678c0 100644
--- a/arch/s390/crypto/chacha-s390.S
+++ b/arch/s390/lib/crypto/chacha-s390.S
@@ -8,32 +8,33 @@
 
 #include <linux/linkage.h>
 #include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu-insn.h>
 
 #define SP	%r15
 #define FRAME	(16 * 8 + 4 * 8)
 
-.data
-.align	32
+	.data
+	.balign	32
 
-.Lsigma:
-.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
-.long	1,0,0,0
-.long	2,0,0,0
-.long	3,0,0,0
-.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
+SYM_DATA_START_LOCAL(sigma)
+	.long	0x61707865,0x3320646e,0x79622d32,0x6b206574	# endian-neutral
+	.long	1,0,0,0
+	.long	2,0,0,0
+	.long	3,0,0,0
+	.long	0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c	# byte swap
 
-.long	0,1,2,3
-.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
-.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
-.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
-.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
+	.long	0,1,2,3
+	.long	0x61707865,0x61707865,0x61707865,0x61707865	# smashed sigma
+	.long	0x3320646e,0x3320646e,0x3320646e,0x3320646e
+	.long	0x79622d32,0x79622d32,0x79622d32,0x79622d32
+	.long	0x6b206574,0x6b206574,0x6b206574,0x6b206574
+SYM_DATA_END(sigma)
 
-.previous
+	.previous
 
 	GEN_BR_THUNK %r14
 
-.text
+	.text
 
 #############################################################################
 # void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
@@ -78,10 +79,10 @@
 #define XT2		%v29
 #define XT3		%v30
 
-ENTRY(chacha20_vx_4x)
+SYM_FUNC_START(chacha20_vx_4x)
 	stmg	%r6,%r7,6*8(SP)
 
-	larl	%r7,.Lsigma
+	larl	%r7,sigma
 	lhi	%r0,10
 	lhi	%r1,0
 
@@ -403,7 +404,7 @@ ENTRY(chacha20_vx_4x)
 
 	lmg	%r6,%r7,6*8(SP)
 	BR_EX	%r14
-ENDPROC(chacha20_vx_4x)
+SYM_FUNC_END(chacha20_vx_4x)
 
 #undef	OUT
 #undef	INP
@@ -471,7 +472,7 @@ ENDPROC(chacha20_vx_4x)
 #define T2		%v29
 #define T3		%v30
 
-ENTRY(chacha20_vx)
+SYM_FUNC_START(chacha20_vx)
 	clgfi	LEN,256
 	jle	chacha20_vx_4x
 	stmg	%r6,%r7,6*8(SP)
@@ -481,7 +482,7 @@ ENTRY(chacha20_vx)
 	la	SP,0(%r1,SP)
 	stg	%r0,0(SP)		# back-chain
 
-	larl	%r7,.Lsigma
+	larl	%r7,sigma
 	lhi	%r0,10
 
 	VLM	K1,K2,0,KEY,0		# load key
@@ -902,6 +903,6 @@ ENTRY(chacha20_vx)
 	lmg	%r6,%r7,FRAME+6*8(SP)
 	la	SP,FRAME(SP)
 	BR_EX	%r14
-ENDPROC(chacha20_vx)
+SYM_FUNC_END(chacha20_vx)
 
 .previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/lib/crypto/chacha-s390.h
index 733744ce30f5..733744ce30f5 100644
--- a/arch/s390/crypto/chacha-s390.h
+++ b/arch/s390/lib/crypto/chacha-s390.h
diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c
new file mode 100644
index 000000000000..7dfe120fafab
--- /dev/null
+++ b/arch/s390/lib/crypto/sha256.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized using the CP Assist for Cryptographic Functions (CPACF)
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/cpacf.h>
+#include <crypto/internal/sha2.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_cpacf_sha256);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+			const u8 *data, size_t nblocks)
+{
+	if (static_branch_likely(&have_cpacf_sha256))
+		cpacf_kimd(CPACF_KIMD_SHA_256, state, data,
+			   nblocks * SHA256_BLOCK_SIZE);
+	else
+		sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+	return static_key_enabled(&have_cpacf_sha256);
+}
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
+
+static int __init sha256_s390_mod_init(void)
+{
+	if (cpu_have_feature(S390_CPU_FEATURE_MSA) &&
+	    cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
+		static_branch_enable(&have_cpacf_sha256);
+	return 0;
+}
+subsys_initcall(sha256_s390_mod_init);
+
+static void __exit sha256_s390_mod_exit(void)
+{
+}
+module_exit(sha256_s390_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 using the CP Assist for Cryptographic Functions (CPACF)");
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c
new file mode 100644
index 000000000000..458abd9bac70
--- /dev/null
+++ b/arch/s390/lib/csum-partial.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/export.h>
+#include <asm/checksum.h>
+#include <asm/fpu.h>
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit). If copy is true copies to dst.
+ *
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
+ *
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
+ *
+ * It's best to have src and dst aligned on a 64-bit boundary.
+ */
+static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy)
+{
+	DECLARE_KERNEL_FPU_ONSTACK8(vxstate);
+
+	if (!cpu_has_vx()) {
+		if (copy)
+			memcpy(dst, src, len);
+		return cksm(dst, len, sum);
+	}
+	kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
+	fpu_vlvgf(16, (__force u32)sum, 1);
+	fpu_vzero(17);
+	fpu_vzero(18);
+	fpu_vzero(19);
+	while (len >= 64) {
+		fpu_vlm(20, 23, src);
+		if (copy) {
+			fpu_vstm(20, 23, dst);
+			dst += 64;
+		}
+		fpu_vcksm(16, 20, 16);
+		fpu_vcksm(17, 21, 17);
+		fpu_vcksm(18, 22, 18);
+		fpu_vcksm(19, 23, 19);
+		src += 64;
+		len -= 64;
+	}
+	while (len >= 32) {
+		fpu_vlm(20, 21, src);
+		if (copy) {
+			fpu_vstm(20, 21, dst);
+			dst += 32;
+		}
+		fpu_vcksm(16, 20, 16);
+		fpu_vcksm(17, 21, 17);
+		src += 32;
+		len -= 32;
+	}
+	while (len >= 16) {
+		fpu_vl(20, src);
+		if (copy) {
+			fpu_vst(20, dst);
+			dst += 16;
+		}
+		fpu_vcksm(16, 20, 16);
+		src += 16;
+		len -= 16;
+	}
+	if (len) {
+		fpu_vll(20, len - 1, src);
+		if (copy)
+			fpu_vstl(20, len - 1, dst);
+		fpu_vcksm(16, 20, 16);
+	}
+	fpu_vcksm(18, 19, 18);
+	fpu_vcksm(16, 17, 16);
+	fpu_vcksm(16, 18, 16);
+	sum = (__force __wsum)fpu_vlgvf(16, 1);
+	kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
+	return sum;
+}
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	return csum_copy(NULL, buff, len, sum, false);
+}
+EXPORT_SYMBOL(csum_partial);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+	return csum_copy(dst, src, len, 0, true);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index f7f5adea8940..be14c58cb989 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -13,13 +13,10 @@
 
 void __delay(unsigned long loops)
 {
-        /*
-         * To end the bloody studid and useless discussion about the
-         * BogoMips number I took the liberty to define the __delay
-         * function in a way that that resulting BogoMips number will
-         * yield the megahertz number of the cpu. The important function
-         * is udelay and that is done using the tod clock. -- martin.
-         */
+	/*
+	 * Loop 'loops' times. Callers must not assume a specific
+	 * amount of time passes before this function returns.
+	 */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 EXPORT_SYMBOL(__delay);
diff --git a/arch/s390/lib/expoline/expoline.S b/arch/s390/lib/expoline.S
index 92ed8409a7a4..92ed8409a7a4 100644
--- a/arch/s390/lib/expoline/expoline.S
+++ b/arch/s390/lib/expoline.S
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
deleted file mode 100644
index 854631d9cb03..000000000000
--- a/arch/s390/lib/expoline/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-y += expoline.o
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index dc0874f2e203..d026debf250c 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -5,8 +5,8 @@
  * Copyright IBM Corp. 2012
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
 
 	GEN_BR_THUNK %r14
@@ -14,8 +14,7 @@
 /*
  * void *memmove(void *dest, const void *src, size_t n)
  */
-WEAK(memmove)
-ENTRY(__memmove)
+SYM_FUNC_START(__memmove)
 	ltgr	%r4,%r4
 	lgr	%r1,%r2
 	jz	.Lmemmove_exit
@@ -35,8 +34,7 @@ ENTRY(__memmove)
 	la	%r3,256(%r3)
 	brctg	%r0,.Lmemmove_forward_loop
 .Lmemmove_forward_remainder:
-	larl	%r5,.Lmemmove_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemmove_mvc
 .Lmemmove_exit:
 	BR_EX	%r14
 .Lmemmove_reverse:
@@ -48,7 +46,10 @@ ENTRY(__memmove)
 	BR_EX	%r14
 .Lmemmove_mvc:
 	mvc	0(1,%r1),0(%r3)
-ENDPROC(__memmove)
+SYM_FUNC_END(__memmove)
+EXPORT_SYMBOL(__memmove)
+
+SYM_FUNC_ALIAS(memmove, __memmove)
 EXPORT_SYMBOL(memmove)
 
 /*
@@ -66,8 +67,7 @@ EXPORT_SYMBOL(memmove)
  *	return __builtin_memset(s, c, n);
  * }
  */
-WEAK(memset)
-ENTRY(__memset)
+SYM_FUNC_START(__memset)
 	ltgr	%r4,%r4
 	jz	.Lmemset_exit
 	ltgr	%r3,%r3
@@ -82,8 +82,7 @@ ENTRY(__memset)
 	la	%r1,256(%r1)
 	brctg	%r3,.Lmemset_clear_loop
 .Lmemset_clear_remainder:
-	larl	%r3,.Lmemset_xc
-	ex	%r4,0(%r3)
+	exrl	%r4,.Lmemset_xc
 .Lmemset_exit:
 	BR_EX	%r14
 .Lmemset_fill:
@@ -101,8 +100,7 @@ ENTRY(__memset)
 	brctg	%r5,.Lmemset_fill_loop
 .Lmemset_fill_remainder:
 	stc	%r3,0(%r1)
-	larl	%r5,.Lmemset_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemset_mvc
 	BR_EX	%r14
 .Lmemset_fill_exit:
 	stc	%r3,0(%r1)
@@ -111,7 +109,10 @@ ENTRY(__memset)
 	xc	0(1,%r1),0(%r1)
 .Lmemset_mvc:
 	mvc	1(1,%r1),0(%r1)
-ENDPROC(__memset)
+SYM_FUNC_END(__memset)
+EXPORT_SYMBOL(__memset)
+
+SYM_FUNC_ALIAS(memset, __memset)
 EXPORT_SYMBOL(memset)
 
 /*
@@ -119,8 +120,7 @@ EXPORT_SYMBOL(memset)
  *
  * void *memcpy(void *dest, const void *src, size_t n)
  */
-WEAK(memcpy)
-ENTRY(__memcpy)
+SYM_FUNC_START(__memcpy)
 	ltgr	%r4,%r4
 	jz	.Lmemcpy_exit
 	aghi	%r4,-1
@@ -129,8 +129,7 @@ ENTRY(__memcpy)
 	lgr	%r1,%r2
 	jnz	.Lmemcpy_loop
 .Lmemcpy_remainder:
-	larl	%r5,.Lmemcpy_mvc
-	ex	%r4,0(%r5)
+	exrl	%r4,.Lmemcpy_mvc
 .Lmemcpy_exit:
 	BR_EX	%r14
 .Lmemcpy_loop:
@@ -141,7 +140,10 @@ ENTRY(__memcpy)
 	j	.Lmemcpy_remainder
 .Lmemcpy_mvc:
 	mvc	0(1,%r1),0(%r3)
-ENDPROC(__memcpy)
+SYM_FUNC_END(__memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+SYM_FUNC_ALIAS(memcpy, __memcpy)
 EXPORT_SYMBOL(memcpy)
 
 /*
@@ -152,7 +154,7 @@ EXPORT_SYMBOL(memcpy)
  * void *__memset64(uint64_t *s, uint64_t v, size_t count)
  */
 .macro __MEMSET bits,bytes,insn
-ENTRY(__memset\bits)
+SYM_FUNC_START(__memset\bits)
 	ltgr	%r4,%r4
 	jz	.L__memset_exit\bits
 	cghi	%r4,\bytes
@@ -169,8 +171,7 @@ ENTRY(__memset\bits)
 	brctg	%r5,.L__memset_loop\bits
 .L__memset_remainder\bits:
 	\insn	%r3,0(%r1)
-	larl	%r5,.L__memset_mvc\bits
-	ex	%r4,0(%r5)
+	exrl	%r4,.L__memset_mvc\bits
 	BR_EX	%r14
 .L__memset_store\bits:
 	\insn	%r3,0(%r2)
@@ -178,7 +179,7 @@ ENTRY(__memset\bits)
 	BR_EX	%r14
 .L__memset_mvc\bits:
 	mvc	\bytes(1,%r1),0(%r1)
-ENDPROC(__memset\bits)
+SYM_FUNC_END(__memset\bits)
 .endm
 
 __MEMSET 16,2,sth
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 04d4c6cf898e..ad9da4038511 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -10,11 +10,14 @@
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/jiffies.h>
+#include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
+#include <linux/io.h>
 #include <asm/alternative.h>
-#include <asm/io.h>
+#include <asm/machine.h>
+#include <asm/asm.h>
 
 int spin_retry = -1;
 
@@ -36,6 +39,23 @@ static int __init spin_retry_setup(char *str)
 }
 __setup("spin_retry=", spin_retry_setup);
 
+static const struct ctl_table s390_spin_sysctl_table[] = {
+	{
+		.procname	= "spin_retry",
+		.data		= &spin_retry,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+};
+
+static int __init init_s390_spin_sysctls(void)
+{
+	register_sysctl_init("kernel", s390_spin_sysctl_table);
+	return 0;
+}
+arch_initcall(init_s390_spin_sysctls);
+
 struct spin_wait {
 	struct spin_wait *next, *prev;
 	int node_id;
@@ -75,25 +95,44 @@ static inline int arch_load_niai4(int *lock)
 	int owner;
 
 	asm_inline volatile(
-		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
-		"	l	%0,%1\n"
-		: "=d" (owner) : "Q" (*lock) : "memory");
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
+		"	l	%[owner],%[lock]\n"
+		: [owner] "=d" (owner) : [lock] "R" (*lock) : "memory");
 	return owner;
 }
 
-static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
+#ifdef __HAVE_ASM_FLAG_OUTPUTS__
+
+static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
+{
+	int cc;
+
+	asm_inline volatile(
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
+		"	cs	%[old],%[new],%[lock]\n"
+		: [old] "+d" (old), [lock] "+Q" (*lock), "=@cc" (cc)
+		: [new] "d" (new)
+		: "memory");
+	return cc == 0;
+}
+
+#else /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
+static inline int arch_try_cmpxchg_niai8(int *lock, int old, int new)
 {
 	int expected = old;
 
 	asm_inline volatile(
-		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
-		"	cs	%0,%3,%1\n"
-		: "=d" (old), "=Q" (*lock)
-		: "0" (old), "d" (new), "Q" (*lock)
+		ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
+		"	cs	%[old],%[new],%[lock]\n"
+		: [old] "+d" (old), [lock] "+Q" (*lock)
+		: [new] "d" (new)
 		: "cc", "memory");
 	return expected == old;
 }
 
+#endif /* __HAVE_ASM_FLAG_OUTPUTS__ */
+
 static inline struct spin_wait *arch_spin_decode_tail(int lock)
 {
 	int ix, cpu;
@@ -119,16 +158,16 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 	struct spin_wait *node, *next;
 	int lockval, ix, node_id, tail_id, old, new, owner, count;
 
-	ix = S390_lowcore.spinlock_index++;
+	ix = get_lowcore()->spinlock_index++;
 	barrier();
-	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+	lockval = spinlock_lockval();	/* cpu + 1 */
 	node = this_cpu_ptr(&spin_wait[ix]);
 	node->prev = node->next = NULL;
 	node_id = node->node_id;
 
 	/* Enqueue the node for this CPU in the spinlock wait queue */
+	old = READ_ONCE(lp->lock);
 	while (1) {
-		old = READ_ONCE(lp->lock);
 		if ((old & _Q_LOCK_CPU_MASK) == 0 &&
 		    (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
 			/*
@@ -139,7 +178,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 			 * waiter will get the lock.
 			 */
 			new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
-			if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+			if (arch_try_cmpxchg(&lp->lock, &old, new))
 				/* Got the lock */
 				goto out;
 			/* lock passing in progress */
@@ -147,7 +186,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 		}
 		/* Make the node of this CPU the new tail. */
 		new = node_id | (old & _Q_LOCK_MASK);
-		if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+		if (arch_try_cmpxchg(&lp->lock, &old, new))
 			break;
 	}
 	/* Set the 'next' pointer of the tail node in the queue */
@@ -184,7 +223,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 		if (!owner) {
 			tail_id = old & _Q_TAIL_MASK;
 			new = ((tail_id != node_id) ? tail_id : 0) | lockval;
-			if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+			if (arch_try_cmpxchg(&lp->lock, &old, new))
 				/* Got the lock */
 				break;
 			continue;
@@ -192,7 +231,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 		if (count-- >= 0)
 			continue;
 		count = spin_retry;
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+		if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
 			smp_yield_cpu(owner - 1);
 	}
 
@@ -205,14 +244,14 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
 	}
 
  out:
-	S390_lowcore.spinlock_index--;
+	get_lowcore()->spinlock_index--;
 }
 
 static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 {
 	int lockval, old, new, owner, count;
 
-	lockval = SPINLOCK_LOCKVAL;	/* cpu + 1 */
+	lockval = spinlock_lockval();	/* cpu + 1 */
 
 	/* Pass the virtual CPU to the lock holder if it is not running */
 	owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
@@ -226,7 +265,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 		/* Try to get the lock if it is free. */
 		if (!owner) {
 			new = (old & _Q_TAIL_MASK) | lockval;
-			if (arch_cmpxchg_niai8(&lp->lock, old, new)) {
+			if (arch_try_cmpxchg_niai8(&lp->lock, old, new)) {
 				/* Got the lock */
 				return;
 			}
@@ -235,7 +274,7 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 		if (count-- >= 0)
 			continue;
 		count = spin_retry;
-		if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+		if (!machine_is_lpar() || arch_vcpu_is_preempted(owner - 1))
 			smp_yield_cpu(owner - 1);
 	}
 }
@@ -251,14 +290,14 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 
 int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
-	int cpu = SPINLOCK_LOCKVAL;
+	int cpu = spinlock_lockval();
 	int owner, count;
 
 	for (count = spin_retry; count > 0; count--) {
 		owner = READ_ONCE(lp->lock);
 		/* Try to get the lock if it is free. */
 		if (!owner) {
-			if (__atomic_cmpxchg_bool(&lp->lock, 0, cpu))
+			if (arch_try_cmpxchg(&lp->lock, &owner, cpu))
 				return 1;
 		}
 	}
@@ -300,7 +339,7 @@ void arch_write_lock_wait(arch_rwlock_t *rw)
 	while (1) {
 		old = READ_ONCE(rw->cnts);
 		if ((old & 0x1ffff) == 0 &&
-		    __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
+		    arch_try_cmpxchg(&rw->cnts, &old, old | 0x10000))
 			/* Got the lock */
 			break;
 		barrier();
@@ -317,7 +356,7 @@ void arch_spin_relax(arch_spinlock_t *lp)
 	cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
 	if (!cpu)
 		return;
-	if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+	if (machine_is_lpar() && !arch_vcpu_is_preempted(cpu - 1))
 		return;
 	smp_yield_cpu(cpu - 1);
 }
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 7d8741818239..099de76e8b1a 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/export.h>
+#include <asm/asm.h>
 
 /*
  * Helper functions to find the end of a string
@@ -77,50 +78,6 @@ EXPORT_SYMBOL(strnlen);
 #endif
 
 /**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- *
- * returns a pointer to @dest
- */
-#ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char *dest, const char *src)
-{
-	char *ret = dest;
-
-	asm volatile(
-		"	lghi	0,0\n"
-		"0:	mvst	%[dest],%[src]\n"
-		"	jo	0b\n"
-		: [dest] "+&a" (dest), [src] "+&a" (src)
-		:
-		: "cc", "memory", "0");
-	return ret;
-}
-EXPORT_SYMBOL(strcpy);
-#endif
-
-/**
- * strncpy - Copy a length-limited, %NUL-terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @n: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @n bytes.
- */
-#ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char *dest, const char *src, size_t n)
-{
-	size_t len = __strnend(src, n) - src;
-	memset(dest + len, 0, n - len);
-	memcpy(dest, src, len);
-	return dest;
-}
-EXPORT_SYMBOL(strncpy);
-#endif
-
-/**
  * strcat - Append one %NUL-terminated string to another
  * @dest: The string to be appended to
  * @src: The string to append to it
@@ -180,9 +137,6 @@ EXPORT_SYMBOL(strlcat);
  * @n: The maximum numbers of bytes to copy
  *
  * returns a pointer to @dest
- *
- * Note that in contrast to strncpy, strncat ensures the result is
- * terminated.
  */
 #ifdef __HAVE_ARCH_STRNCAT
 char *strncat(char *dest, const char *src, size_t n)
@@ -238,12 +192,11 @@ static inline int clcle(const char *s1, unsigned long l1,
 	asm volatile(
 		"0:	clcle	%[r1],%[r3],0\n"
 		"	jo	0b\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=&d" (cc), [r1] "+&d" (r1.pair), [r3] "+&d" (r3.pair)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [r1] "+d" (r1.pair), [r3] "+d" (r3.pair)
 		:
-		: "cc", "memory");
-	return cc;
+		: CC_CLOBBER_LIST("memory"));
+	return CC_TRANSFORM(cc);
 }
 
 /**
diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c
index 9e62d62812e5..9021298c3e8a 100644
--- a/arch/s390/lib/test_kprobes.c
+++ b/arch/s390/lib/test_kprobes.c
@@ -72,4 +72,5 @@ static struct kunit_suite kprobes_test_suite = {
 
 kunit_test_suites(&kprobes_test_suite);
 
+MODULE_DESCRIPTION("KUnit tests for kprobes");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
index 9894009fc1f2..f96b6a3737e7 100644
--- a/arch/s390/lib/test_modules.c
+++ b/arch/s390/lib/test_modules.c
@@ -29,4 +29,5 @@ static struct kunit_suite modules_test_suite = {
 
 kunit_test_suites(&modules_test_suite);
 
+MODULE_DESCRIPTION("KUnit test that modules with many relocations are loaded properly");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 5a053b393d5c..6e42100875e7 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -47,7 +47,7 @@ static void print_backtrace(char *bt)
 static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 				unsigned long sp)
 {
-	int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline;
+	int frame_count, prev_is_func2, seen_func2_func1, seen_arch_rethook_trampoline;
 	const int max_frames = 128;
 	struct unwind_state state;
 	size_t bt_pos = 0;
@@ -63,7 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 	frame_count = 0;
 	prev_is_func2 = 0;
 	seen_func2_func1 = 0;
-	seen_kretprobe_trampoline = 0;
+	seen_arch_rethook_trampoline = 0;
 	unwind_for_each_frame(&state, task, regs, sp) {
 		unsigned long addr = unwind_get_return_address(&state);
 		char sym[KSYM_SYMBOL_LEN];
@@ -89,8 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 		if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
 			seen_func2_func1 = 1;
 		prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
-		if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/"))
-			seen_kretprobe_trampoline = 1;
+		if (str_has_prefix(sym, "arch_rethook_trampoline+0x0/"))
+			seen_arch_rethook_trampoline = 1;
 	}
 
 	/* Check the results. */
@@ -106,8 +106,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 		kunit_err(current_test, "Maximum number of frames exceeded\n");
 		ret = -EINVAL;
 	}
-	if (seen_kretprobe_trampoline) {
-		kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n");
+	if (seen_arch_rethook_trampoline) {
+		kunit_err(current_test, "arch_rethook_trampoline+0x0 in unwinding results\n");
 		ret = -EINVAL;
 	}
 	if (ret || force_bt)
@@ -270,9 +270,9 @@ static void notrace __used test_unwind_ftrace_handler(unsigned long ip,
 						      struct ftrace_ops *fops,
 						      struct ftrace_regs *fregs)
 {
-	struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2];
+	struct unwindme *u = (struct unwindme *)arch_ftrace_regs(fregs)->regs.gprs[2];
 
-	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL,
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &arch_ftrace_regs(fregs)->regs : NULL,
 			     (u->flags & UWM_SP) ? u->sp : 0);
 }
 
@@ -350,15 +350,15 @@ static noinline int unwindme_func3(struct unwindme *u)
 /* This function must appear in the backtrace. */
 static noinline int unwindme_func2(struct unwindme *u)
 {
-	unsigned long flags;
+	unsigned long flags, mflags;
 	int rc;
 
 	if (u->flags & UWM_SWITCH_STACK) {
 		local_irq_save(flags);
-		local_mcck_disable();
-		rc = call_on_stack(1, S390_lowcore.nodat_stack,
+		local_mcck_save(mflags);
+		rc = call_on_stack(1, get_lowcore()->nodat_stack,
 				   int, unwindme_func3, struct unwindme *, u);
-		local_mcck_enable();
+		local_mcck_restore(mflags);
 		local_irq_restore(flags);
 		return rc;
 	} else {
@@ -519,4 +519,5 @@ static struct kunit_suite test_unwind_suite = {
 
 kunit_test_suites(&test_unwind_suite);
 
+MODULE_DESCRIPTION("KUnit test for unwind_for_each_frame");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
new file mode 100644
index 000000000000..96214f51f49b
--- /dev/null
+++ b/arch/s390/lib/tishift.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+
+	.section .noinstr.text, "ax"
+
+	GEN_BR_THUNK %r14
+
+SYM_FUNC_START(__ashlti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	srlg	%r3,%r1,0(%r3)
+	sllg	%r0,%r0,0(%r4)
+	sllg	%r1,%r1,0(%r4)
+	ogr	%r0,%r3
+	j	1f
+0:	sllg	%r0,%r1,-64(%r4)
+	lghi	%r1,0
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	sllg	%r3,%r0,0(%r3)
+	srlg	%r1,%r1,0(%r4)
+	srag	%r0,%r0,0(%r4)
+	ogr	%r1,%r3
+	j	1f
+0:	srag	%r1,%r0,-64(%r4)
+	srag	%r0,%r0,63
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+	lmg	%r0,%r1,0(%r3)
+	cije	%r4,0,1f
+	lhi	%r3,64
+	sr	%r3,%r4
+	jnh	0f
+	sllg	%r3,%r0,0(%r3)
+	srlg	%r1,%r1,0(%r4)
+	srlg	%r0,%r0,0(%r4)
+	ogr	%r1,%r3
+	j	1f
+0:	srlg	%r1,%r0,-64(%r4)
+	lghi	%r0,0
+1:	stmg	%r0,%r1,0(%r2)
+	BR_EX	%r14
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d7b3b193d108..fa7d98fa1320 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -12,67 +12,79 @@
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <asm/asm-extable.h>
+#include <asm/ctlreg.h>
 
 #ifdef CONFIG_DEBUG_ENTRY
 void debug_user_asce(int exit)
 {
-	unsigned long cr1, cr7;
+	struct lowcore *lc = get_lowcore();
+	struct ctlreg cr1, cr7;
 
-	__ctl_store(cr1, 1, 1);
-	__ctl_store(cr7, 7, 7);
-	if (cr1 == S390_lowcore.kernel_asce && cr7 == S390_lowcore.user_asce)
+	local_ctl_store(1, &cr1);
+	local_ctl_store(7, &cr7);
+	if (cr1.val == lc->user_asce.val && cr7.val == lc->user_asce.val)
 		return;
 	panic("incorrect ASCE on kernel %s\n"
 	      "cr1:    %016lx cr7:  %016lx\n"
-	      "kernel: %016llx user: %016llx\n",
-	      exit ? "exit" : "entry", cr1, cr7,
-	      S390_lowcore.kernel_asce, S390_lowcore.user_asce);
-
+	      "kernel: %016lx user: %016lx\n",
+	      exit ? "exit" : "entry", cr1.val, cr7.val,
+	      lc->kernel_asce.val, lc->user_asce.val);
 }
 #endif /*CONFIG_DEBUG_ENTRY */
 
-static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
-					    unsigned long size, unsigned long key)
+union oac {
+	unsigned int val;
+	struct {
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac1;
+		struct {
+			unsigned short key : 4;
+			unsigned short	   : 4;
+			unsigned short as  : 2;
+			unsigned short	   : 4;
+			unsigned short k   : 1;
+			unsigned short a   : 1;
+		} oac2;
+	};
+};
+
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key)
 {
-	unsigned long tmp1, tmp2;
+	unsigned long osize;
 	union oac spec = {
 		.oac2.key = key,
 		.oac2.as = PSW_BITS_AS_SECONDARY,
 		.oac2.k = 1,
 		.oac2.a = 1,
 	};
+	int cc;
 
-	tmp1 = -4096UL;
-	asm volatile(
-		"   lr	  0,%[spec]\n"
-		"0: mvcos 0(%2),0(%1),%0\n"
-		"6: jz    4f\n"
-		"1: algr  %0,%3\n"
-		"   slgr  %1,%3\n"
-		"   slgr  %2,%3\n"
-		"   j     0b\n"
-		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
-		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
-		"   slgr  %4,%1\n"
-		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
-		"   jnh   5f\n"
-		"3: mvcos 0(%2),0(%1),%4\n"
-		"7: slgr  %0,%4\n"
-		"   j     5f\n"
-		"4: slgr  %0,%0\n"
-		"5:\n"
-		EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
-		: "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2)
-		: [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
-}
-
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	return raw_copy_from_user_key(to, from, n, 0);
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	lr	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_FROM(0b, 0b)
+			EX_TABLE_UA_MVCOS_FROM(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char *)to)
+			: [spec] "d" (spec.val), [from] "Q" (*(const char __user *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (CC_TRANSFORM(cc) == 0)
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
 }
-EXPORT_SYMBOL(raw_copy_from_user);
 
 unsigned long _copy_from_user_key(void *to, const void __user *from,
 				  unsigned long n, unsigned long key)
@@ -81,8 +93,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
 
 	might_fault();
 	if (!should_fail_usercopy()) {
-		instrument_copy_from_user(to, from, n);
+		instrument_copy_from_user_before(to, from, n);
 		res = raw_copy_from_user_key(to, from, n, key);
+		instrument_copy_from_user_after(to, from, n, res);
 	}
 	if (unlikely(res))
 		memset(to + (n - res), 0, res);
@@ -90,48 +103,37 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
 }
 EXPORT_SYMBOL(_copy_from_user_key);
 
-static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
-					  unsigned long size, unsigned long key)
+static uaccess_kmsan_or_inline __must_check unsigned long
+raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key)
 {
-	unsigned long tmp1, tmp2;
+	unsigned long osize;
 	union oac spec = {
 		.oac1.key = key,
 		.oac1.as = PSW_BITS_AS_SECONDARY,
 		.oac1.k = 1,
 		.oac1.a = 1,
 	};
+	int cc;
 
-	tmp1 = -4096UL;
-	asm volatile(
-		"   lr	  0,%[spec]\n"
-		"0: mvcos 0(%1),0(%2),%0\n"
-		"6: jz    4f\n"
-		"1: algr  %0,%3\n"
-		"   slgr  %1,%3\n"
-		"   slgr  %2,%3\n"
-		"   j     0b\n"
-		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
-		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
-		"   slgr  %4,%1\n"
-		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
-		"   jnh   5f\n"
-		"3: mvcos 0(%1),0(%2),%4\n"
-		"7: slgr  %0,%4\n"
-		"   j     5f\n"
-		"4: slgr  %0,%0\n"
-		"5:\n"
-		EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
-		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
-		: [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
-}
-
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	return raw_copy_to_user_key(to, from, n, 0);
+	while (1) {
+		osize = size;
+		asm_inline volatile(
+			"	lr	%%r0,%[spec]\n"
+			"0:	mvcos	%[to],%[from],%[size]\n"
+			"1:	nopr	%%r7\n"
+			CC_IPM(cc)
+			EX_TABLE_UA_MVCOS_TO(0b, 0b)
+			EX_TABLE_UA_MVCOS_TO(1b, 0b)
+			: CC_OUT(cc, cc), [size] "+d" (size), [to] "=Q" (*(char __user *)to)
+			: [spec] "d" (spec.val), [from] "Q" (*(const char *)from)
+			: CC_CLOBBER_LIST("memory", "0"));
+		if (CC_TRANSFORM(cc) == 0)
+			return osize - size;
+		size -= 4096;
+		to += 4096;
+		from += 4096;
+	}
 }
-EXPORT_SYMBOL(raw_copy_to_user);
 
 unsigned long _copy_to_user_key(void __user *to, const void *from,
 				unsigned long n, unsigned long key)
@@ -143,37 +145,3 @@ unsigned long _copy_to_user_key(void __user *to, const void *from,
 	return raw_copy_to_user_key(to, from, n, key);
 }
 EXPORT_SYMBOL(_copy_to_user_key);
-
-unsigned long __clear_user(void __user *to, unsigned long size)
-{
-	unsigned long tmp1, tmp2;
-	union oac spec = {
-		.oac1.as = PSW_BITS_AS_SECONDARY,
-		.oac1.a = 1,
-	};
-
-	tmp1 = -4096UL;
-	asm volatile(
-		"   lr	  0,%[spec]\n"
-		"0: mvcos 0(%1),0(%4),%0\n"
-		"   jz	  4f\n"
-		"1: algr  %0,%2\n"
-		"   slgr  %1,%2\n"
-		"   j	  0b\n"
-		"2: la	  %3,4095(%1)\n"/* %4 = to + 4095 */
-		"   nr	  %3,%2\n"	/* %4 = (to + 4095) & -4096 */
-		"   slgr  %3,%1\n"
-		"   clgr  %0,%3\n"	/* copy crosses next page boundary? */
-		"   jnh	  5f\n"
-		"3: mvcos 0(%1),0(%4),%3\n"
-		"   slgr  %0,%3\n"
-		"   j	  5f\n"
-		"4: slgr  %0,%0\n"
-		"5:\n"
-		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
-		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
-		: "a" (empty_zero_page), [spec] "d" (spec.val)
-		: "cc", "memory", "0");
-	return size;
-}
-EXPORT_SYMBOL(__clear_user);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index fb924a8041dc..ce7bcf7c0032 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p2)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
 		"	jm	3f\n"
 		"	srlg	0,%0,8\n"
@@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%1,256(%1)\n"
 		"	la	%2,256(%2)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
+		"1:	exrl	%0,2f\n"
 		"	j	3f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
 		"3:\n"
 		: : "d" (bytes), "a" (p1), "a" (p2)
-		: "0", "1", "cc", "memory");
+		: "0", "cc", "memory");
 }
 
 static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
@@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p3)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	4f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%2,256(%2)\n"
 		"	la	%3,256(%3)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	j	4f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
@@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		     const unsigned long * __restrict p4)
 {
 	asm volatile(
-		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	5f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%3,256(%3)\n"
 		"	la	%4,256(%4)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	j	5f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
@@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 	asm volatile(
 		"	larl	1,2f\n"
 		"	aghi	%0,-1\n"
-		"	jm	3f\n"
+		"	jm	6f\n"
 		"	srlg	0,%0,8\n"
 		"	ltgr	0,0\n"
 		"	jz	1f\n"
@@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
 		"	la	%4,256(%4)\n"
 		"	la	%5,256(%5)\n"
 		"	brctg	0,0b\n"
-		"1:	ex	%0,0(1)\n"
-		"	ex	%0,6(1)\n"
-		"	ex	%0,12(1)\n"
-		"	ex	%0,18(1)\n"
-		"	j	3f\n"
+		"1:	exrl	%0,2f\n"
+		"	exrl	%0,3f\n"
+		"	exrl	%0,4f\n"
+		"	exrl	%0,5f\n"
+		"	j	6f\n"
 		"2:	xc	0(1,%1),0(%2)\n"
-		"	xc	0(1,%1),0(%3)\n"
-		"	xc	0(1,%1),0(%4)\n"
-		"	xc	0(1,%1),0(%5)\n"
-		"3:\n"
+		"3:	xc	0(1,%1),0(%3)\n"
+		"4:	xc	0(1,%1),0(%4)\n"
+		"5:	xc	0(1,%1),0(%5)\n"
+		"6:\n"
 		: "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
 		  "+a" (p5)
-		: : "0", "1", "cc", "memory");
+		: : "0", "cc", "memory");
 }
 
 struct xor_block_template xor_block_xc = {
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 57e4f3a24829..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -7,9 +7,10 @@ obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
 obj-y		+= page-states.o pageattr.o pgtable.o pgalloc.o extable.o
 
 obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
+obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
+obj-$(CONFIG_PFAULT)		+= pfault.o
 
-KASAN_SANITIZE_kasan_init.o	:= n
-obj-$(CONFIG_KASAN)		+= kasan_init.o
+obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 9141ed4c52e9..e2a6eb92420f 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -90,16 +90,17 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10_range(virt_to_pfn(addr), 1);
+		diag10_range(virt_to_pfn((void *)addr), 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
 		nr--;
+		cond_resched();
 	}
 	return nr;
 }
 
-static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+static long __cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
 {
 	struct cmm_page_array *pa;
 	unsigned long addr;
@@ -123,6 +124,21 @@ static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
 	return nr;
 }
 
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+{
+	long inc = 0;
+
+	while (nr) {
+		inc = min(256L, nr);
+		nr -= inc;
+		inc = __cmm_free_pages(inc, counter, list);
+		if (inc)
+			break;
+		cond_resched();
+	}
+	return nr + inc;
+}
+
 static int cmm_oom_notify(struct notifier_block *self,
 			  unsigned long dummy, void *parm)
 {
@@ -185,10 +201,10 @@ static void cmm_set_timer(void)
 {
 	if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
 		if (timer_pending(&cmm_timer))
-			del_timer(&cmm_timer);
+			timer_delete(&cmm_timer);
 		return;
 	}
-	mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
+	mod_timer(&cmm_timer, jiffies + secs_to_jiffies(cmm_timeout_seconds));
 }
 
 static void cmm_timer_fn(struct timer_list *unused)
@@ -243,7 +259,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
 	return str != cp;
 }
 
-static int cmm_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_pages_handler(const struct ctl_table *ctl, int write,
 			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long nr = cmm_get_pages();
@@ -262,7 +278,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
 	return 0;
 }
 
-static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_timed_pages_handler(const struct ctl_table *ctl, int write,
 				   void *buffer, size_t *lenp,
 				   loff_t *ppos)
 {
@@ -282,7 +298,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
 	return 0;
 }
 
-static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+static int cmm_timeout_handler(const struct ctl_table *ctl, int write,
 			       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
@@ -316,7 +332,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
 	return 0;
 }
 
-static struct ctl_table cmm_table[] = {
+static const struct ctl_table cmm_table[] = {
 	{
 		.procname	= "cmm_pages",
 		.mode		= 0644,
@@ -332,17 +348,6 @@ static struct ctl_table cmm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= cmm_timeout_handler,
 	},
-	{ }
-};
-
-static struct ctl_table cmm_dir_table[] = {
-	{
-		.procname	= "vm",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= cmm_table,
-	},
-	{ }
 };
 
 #ifdef CONFIG_CMM_IUCV
@@ -389,7 +394,7 @@ static int __init cmm_init(void)
 {
 	int rc = -ENOMEM;
 
-	cmm_sysctl_header = register_sysctl_table(cmm_dir_table);
+	cmm_sysctl_header = register_sysctl("vm", cmm_table);
 	if (!cmm_sysctl_header)
 		goto out_sysctl;
 #ifdef CONFIG_CMM_IUCV
@@ -419,7 +424,7 @@ out_smsg:
 #endif
 	unregister_sysctl_table(cmm_sysctl_header);
 out_sysctl:
-	del_timer_sync(&cmm_timer);
+	timer_delete_sync(&cmm_timer);
 	return rc;
 }
 module_init(cmm_init);
@@ -432,10 +437,11 @@ static void __exit cmm_exit(void)
 #endif
 	unregister_oom_notifier(&cmm_oom_nb);
 	kthread_stop(cmm_thread_ptr);
-	del_timer_sync(&cmm_timer);
+	timer_delete_sync(&cmm_timer);
 	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
 	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
 }
 module_exit(cmm_exit);
 
+MODULE_DESCRIPTION("Cooperative memory management interface");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9f9af5298dd6..ac604b176660 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,69 +1,31 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpufeature.h>
 #include <linux/set_memory.h>
 #include <linux/ptdump.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/sort.h>
 #include <linux/mm.h>
 #include <linux/kfence.h>
 #include <linux/kasan.h>
-#include <asm/ptdump.h>
 #include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
 #include <asm/nospec-branch.h>
 #include <asm/sections.h>
+#include <asm/maccess.h>
 
 static unsigned long max_addr;
 
 struct addr_marker {
+	int is_start;
 	unsigned long start_address;
+	unsigned long size;
 	const char *name;
 };
 
-enum address_markers_idx {
-	IDENTITY_BEFORE_NR = 0,
-	IDENTITY_BEFORE_END_NR,
-	KERNEL_START_NR,
-	KERNEL_END_NR,
-#ifdef CONFIG_KFENCE
-	KFENCE_START_NR,
-	KFENCE_END_NR,
-#endif
-	IDENTITY_AFTER_NR,
-	IDENTITY_AFTER_END_NR,
-#ifdef CONFIG_KASAN
-	KASAN_SHADOW_START_NR,
-	KASAN_SHADOW_END_NR,
-#endif
-	VMEMMAP_NR,
-	VMEMMAP_END_NR,
-	VMALLOC_NR,
-	VMALLOC_END_NR,
-	MODULES_NR,
-	MODULES_END_NR,
-};
-
-static struct addr_marker address_markers[] = {
-	[IDENTITY_BEFORE_NR]	= {0, "Identity Mapping Start"},
-	[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
-	[KERNEL_START_NR]	= {(unsigned long)_stext, "Kernel Image Start"},
-	[KERNEL_END_NR]		= {(unsigned long)_end, "Kernel Image End"},
-#ifdef CONFIG_KFENCE
-	[KFENCE_START_NR]	= {0, "KFence Pool Start"},
-	[KFENCE_END_NR]		= {0, "KFence Pool End"},
-#endif
-	[IDENTITY_AFTER_NR]	= {(unsigned long)_end, "Identity Mapping Start"},
-	[IDENTITY_AFTER_END_NR]	= {0, "Identity Mapping End"},
-#ifdef CONFIG_KASAN
-	[KASAN_SHADOW_START_NR]	= {KASAN_SHADOW_START, "Kasan Shadow Start"},
-	[KASAN_SHADOW_END_NR]	= {KASAN_SHADOW_END, "Kasan Shadow End"},
-#endif
-	[VMEMMAP_NR]		= {0, "vmemmap Area Start"},
-	[VMEMMAP_END_NR]	= {0, "vmemmap Area End"},
-	[VMALLOC_NR]		= {0, "vmalloc Area Start"},
-	[VMALLOC_END_NR]	= {0, "vmalloc Area End"},
-	[MODULES_NR]		= {0, "Modules Area Start"},
-	[MODULES_END_NR]	= {0, "Modules Area End"},
-	{ -1, NULL }
-};
+static struct addr_marker *markers;
+static unsigned int markers_cnt;
 
 struct pg_state {
 	struct ptdump_state ptdump;
@@ -108,7 +70,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
 
 static void note_prot_wx(struct pg_state *st, unsigned long addr)
 {
-#ifdef CONFIG_DEBUG_WX
 	if (!st->check_wx)
 		return;
 	if (st->current_prot & _PAGE_INVALID)
@@ -123,12 +84,26 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 	 * in which case we have two lpswe instructions in lowcore that need
 	 * to be executable.
 	 */
-	if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
+	if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !cpu_has_bear()))
 		return;
-	WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+	WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
+		  "s390/mm: Found insecure W+X mapping at address %pS\n",
 		  (void *)st->start_address);
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
-#endif /* CONFIG_DEBUG_WX */
+}
+
+static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level)
+{
+	struct seq_file *m = st->seq;
+
+	while (addr >= st->marker[1].start_address) {
+		st->marker++;
+		pt_dump_seq_printf(m, "---[ %s %s ]---\n", st->marker->name,
+				   st->marker->is_start ? "Start" : "End");
+	}
+	st->start_address = addr;
+	st->current_prot = prot;
+	st->level = level;
 }
 
 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
@@ -153,10 +128,8 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 	if (level == -1)
 		addr = max_addr;
 	if (st->level == -1) {
-		pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
-		st->start_address = addr;
-		st->current_prot = prot;
-		st->level = level;
+		pt_dump_seq_puts(m, "---[ Kernel Virtual Address Space ]---\n");
+		note_page_update_state(st, addr, prot, level);
 	} else if (prot != st->current_prot || level != st->level ||
 		   addr >= st->marker[1].start_address) {
 		note_prot_wx(st, addr);
@@ -170,22 +143,52 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 		}
 		pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
 		print_prot(m, st->current_prot, st->level);
-		while (addr >= st->marker[1].start_address) {
-			st->marker++;
-			pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
-		}
-		st->start_address = addr;
-		st->current_prot = prot;
-		st->level = level;
+		note_page_update_state(st, addr, prot, level);
 	}
 }
 
-#ifdef CONFIG_DEBUG_WX
-void ptdump_check_wx(void)
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+	note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+	note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+	note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+	note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+	note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+	pte_t pte_zero = {0};
+
+	note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
+bool ptdump_check_wx(void)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
@@ -203,24 +206,33 @@ void ptdump_check_wx(void)
 		},
 	};
 
-	if (!MACHINE_HAS_NX)
-		return;
+	if (!cpu_has_nx())
+		return true;
 	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
-	if (st.wx_pages)
+	if (st.wx_pages) {
 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
-	else
+
+		return false;
+	} else {
 		pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
-			(nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+			(nospec_uses_trampoline() || !cpu_has_bear()) ?
 			"unexpected " : "");
+
+		return true;
+	}
 }
-#endif /* CONFIG_DEBUG_WX */
 
 #ifdef CONFIG_PTDUMP_DEBUGFS
 static int ptdump_show(struct seq_file *m, void *v)
 {
 	struct pg_state st = {
 		.ptdump = {
-			.note_page = note_page,
+			.note_page_pte = note_page_pte,
+			.note_page_pmd = note_page_pmd,
+			.note_page_pud = note_page_pud,
+			.note_page_p4d = note_page_p4d,
+			.note_page_pgd = note_page_pgd,
+			.note_page_flush = note_page_flush,
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
@@ -232,7 +244,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.check_wx = false,
 		.wx_pages = 0,
 		.start_address = 0,
-		.marker = address_markers,
+		.marker = markers,
 	};
 
 	get_online_mems();
@@ -245,22 +257,66 @@ static int ptdump_show(struct seq_file *m, void *v)
 DEFINE_SHOW_ATTRIBUTE(ptdump);
 #endif /* CONFIG_PTDUMP_DEBUGFS */
 
-/*
- * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
- * insertion sort to preserve the original order of markers with the same
- * start address.
- */
-static void sort_address_markers(void)
+static int ptdump_cmp(const void *a, const void *b)
 {
-	struct addr_marker tmp;
-	int i, j;
-
-	for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
-		tmp = address_markers[i];
-		for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
-			address_markers[j + 1] = address_markers[j];
-		address_markers[j + 1] = tmp;
+	const struct addr_marker *ama = a;
+	const struct addr_marker *amb = b;
+
+	if (ama->start_address > amb->start_address)
+		return 1;
+	if (ama->start_address < amb->start_address)
+		return -1;
+	/*
+	 * If the start addresses of two markers are identical sort markers in an
+	 * order that considers areas contained within other areas correctly.
+	 */
+	if (ama->is_start && amb->is_start) {
+		if (ama->size > amb->size)
+			return -1;
+		if (ama->size < amb->size)
+			return 1;
+		return 0;
 	}
+	if (!ama->is_start && !amb->is_start) {
+		if (ama->size > amb->size)
+			return 1;
+		if (ama->size < amb->size)
+			return -1;
+		return 0;
+	}
+	if (ama->is_start)
+		return 1;
+	if (amb->is_start)
+		return -1;
+	return 0;
+}
+
+static int add_marker(unsigned long start, unsigned long end, const char *name)
+{
+	size_t oldsize, newsize;
+
+	oldsize = markers_cnt * sizeof(*markers);
+	newsize = oldsize + 2 * sizeof(*markers);
+	if (!oldsize)
+		markers = kvmalloc(newsize, GFP_KERNEL);
+	else
+		markers = kvrealloc(markers, newsize, GFP_KERNEL);
+	if (!markers)
+		goto error;
+	markers[markers_cnt].is_start = 1;
+	markers[markers_cnt].start_address = start;
+	markers[markers_cnt].size = end - start;
+	markers[markers_cnt].name = name;
+	markers_cnt++;
+	markers[markers_cnt].is_start = 0;
+	markers[markers_cnt].start_address = end;
+	markers[markers_cnt].size = end - start;
+	markers[markers_cnt].name = name;
+	markers_cnt++;
+	return 0;
+error:
+	markers_cnt = 0;
+	return -ENOMEM;
 }
 
 static int pt_dump_init(void)
@@ -268,28 +324,48 @@ static int pt_dump_init(void)
 #ifdef CONFIG_KFENCE
 	unsigned long kfence_start = (unsigned long)__kfence_pool;
 #endif
+	unsigned long lowcore = (unsigned long)get_lowcore();
+	int rc;
+
 	/*
 	 * Figure out the maximum virtual address being accessible with the
 	 * kernel ASCE. We need this to keep the page table walker functions
 	 * from accessing non-existent entries.
 	 */
-	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+	max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
 	max_addr = 1UL << (max_addr * 11 + 31);
-	address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
-	address_markers[MODULES_NR].start_address = MODULES_VADDR;
-	address_markers[MODULES_END_NR].start_address = MODULES_END;
-	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
-	address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
-	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
-	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+	/* start + end markers - must be added first */
+	rc = add_marker(0, -1UL, NULL);
+	rc |= add_marker((unsigned long)_stext, (unsigned long)_end, "Kernel Image");
+	rc |= add_marker(lowcore, lowcore + sizeof(struct lowcore), "Lowcore");
+	rc |= add_marker(__identity_base, __identity_base + ident_map_size, "Identity Mapping");
+	rc |= add_marker((unsigned long)__samode31, (unsigned long)__eamode31, "Amode31 Area");
+	rc |= add_marker(MODULES_VADDR, MODULES_END, "Modules Area");
+	rc |= add_marker(__abs_lowcore, __abs_lowcore + ABS_LOWCORE_MAP_SIZE, "Lowcore Area");
+	rc |= add_marker(__memcpy_real_area, __memcpy_real_area + MEMCPY_REAL_SIZE, "Real Memory Copy Area");
+	rc |= add_marker((unsigned long)vmemmap, (unsigned long)vmemmap + vmemmap_size, "vmemmap Area");
+	rc |= add_marker(VMALLOC_START, VMALLOC_END, "vmalloc Area");
 #ifdef CONFIG_KFENCE
-	address_markers[KFENCE_START_NR].start_address = kfence_start;
-	address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
+	rc |= add_marker(kfence_start, kfence_start + KFENCE_POOL_SIZE, "KFence Pool");
+#endif
+#ifdef CONFIG_KMSAN
+	rc |= add_marker(KMSAN_VMALLOC_SHADOW_START, KMSAN_VMALLOC_SHADOW_END, "Kmsan vmalloc Shadow");
+	rc |= add_marker(KMSAN_VMALLOC_ORIGIN_START, KMSAN_VMALLOC_ORIGIN_END, "Kmsan vmalloc Origins");
+	rc |= add_marker(KMSAN_MODULES_SHADOW_START, KMSAN_MODULES_SHADOW_END, "Kmsan Modules Shadow");
+	rc |= add_marker(KMSAN_MODULES_ORIGIN_START, KMSAN_MODULES_ORIGIN_END, "Kmsan Modules Origins");
+#endif
+#ifdef CONFIG_KASAN
+	rc |= add_marker(KASAN_SHADOW_START, KASAN_SHADOW_END, "Kasan Shadow");
 #endif
-	sort_address_markers();
+	if (rc)
+		goto error;
+	sort(&markers[1], markers_cnt - 1, sizeof(*markers), ptdump_cmp, NULL);
 #ifdef CONFIG_PTDUMP_DEBUGFS
 	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
 #endif /* CONFIG_PTDUMP_DEBUGFS */
 	return 0;
+error:
+	kvfree(markers);
+	return -ENOMEM;
 }
 device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 1e4d2187541a..7498e858c401 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -7,6 +7,7 @@
 #include <linux/panic.h>
 #include <asm/asm-extable.h>
 #include <asm/extable.h>
+#include <asm/fpu.h>
 
 const struct exception_table_entry *s390_search_extables(unsigned long addr)
 {
@@ -26,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r
 	return true;
 }
 
-static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs)
 {
 	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
 
@@ -35,26 +36,83 @@ static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct p
 	return true;
 }
 
-static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+				   bool pair, struct pt_regs *regs)
 {
-	unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
 	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
-	size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
 
 	regs->gprs[reg_err] = -EFAULT;
-	memset((void *)regs->gprs[reg_addr], 0, len);
+	regs->gprs[reg_zero] = 0;
+	if (pair)
+		regs->gprs[reg_zero + 1] = 0;
 	regs->psw.addr = extable_fixup(ex);
 	return true;
 }
 
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs)
 {
-	unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
-	unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+	unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned int reg_data = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+	unsigned long data, addr, offset;
 
-	regs->gprs[reg_err] = -EFAULT;
-	regs->gprs[reg_zero] = 0;
+	addr = regs->gprs[reg_addr];
+	offset = addr & (sizeof(unsigned long) - 1);
+	addr &= ~(sizeof(unsigned long) - 1);
+	data = *(unsigned long *)addr;
+	data <<= BITS_PER_BYTE * offset;
+	regs->gprs[reg_data] = data;
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	fpu_sfpc(0);
+	regs->psw.addr = extable_fixup(ex);
+	return true;
+}
+
+struct insn_ssf {
+	u64	opc1 : 8;
+	u64	r3   : 4;
+	u64	opc2 : 4;
+	u64	b1   : 4;
+	u64	d1   : 12;
+	u64	b2   : 4;
+	u64	d2   : 12;
+} __packed;
+
+static bool ex_handler_ua_mvcos(const struct exception_table_entry *ex,
+				bool from, struct pt_regs *regs)
+{
+	unsigned long uaddr, remainder;
+	struct insn_ssf *insn;
+
+	/*
+	 * If the faulting user space access crossed a page boundary retry by
+	 * limiting the access to the first page (adjust length accordingly).
+	 * Then the mvcos instruction will either complete with condition code
+	 * zero, or generate another fault where the user space access did not
+	 * cross a page boundary.
+	 * If the faulting user space access did not cross a page boundary set
+	 * length to zero and retry. In this case no user space access will
+	 * happen, and the mvcos instruction will complete with condition code
+	 * zero.
+	 * In both cases the instruction will complete with condition code
+	 * zero (copying finished), and the register which contains the
+	 * length, indicates the number of bytes copied.
+	 */
 	regs->psw.addr = extable_fixup(ex);
+	insn = (struct insn_ssf *)regs->psw.addr;
+	if (from)
+		uaddr = regs->gprs[insn->b2] + insn->d2;
+	else
+		uaddr = regs->gprs[insn->b1] + insn->d1;
+	remainder = PAGE_SIZE - (uaddr & (PAGE_SIZE - 1));
+	if (regs->gprs[insn->r3] <= remainder)
+		remainder = 0;
+	regs->gprs[insn->r3] = remainder;
 	return true;
 }
 
@@ -70,12 +128,20 @@ bool fixup_exception(struct pt_regs *regs)
 		return ex_handler_fixup(ex, regs);
 	case EX_TYPE_BPF:
 		return ex_handler_bpf(ex, regs);
-	case EX_TYPE_UA_STORE:
-		return ex_handler_ua_store(ex, regs);
-	case EX_TYPE_UA_LOAD_MEM:
-		return ex_handler_ua_load_mem(ex, regs);
+	case EX_TYPE_UA_FAULT:
+		return ex_handler_ua_fault(ex, regs);
 	case EX_TYPE_UA_LOAD_REG:
-		return ex_handler_ua_load_reg(ex, regs);
+		return ex_handler_ua_load_reg(ex, false, regs);
+	case EX_TYPE_UA_LOAD_REGPAIR:
+		return ex_handler_ua_load_reg(ex, true, regs);
+	case EX_TYPE_ZEROPAD:
+		return ex_handler_zeropad(ex, regs);
+	case EX_TYPE_FPC:
+		return ex_handler_fpc(ex, regs);
+	case EX_TYPE_UA_MVCOS_TO:
+		return ex_handler_ua_mvcos(ex, false, regs);
+	case EX_TYPE_UA_MVCOS_FROM:
+		return ex_handler_ua_mvcos(ex, true, regs);
 	}
 	panic("invalid exception table entry");
 }
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 5060956b8e7d..f7da53e212f5 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -21,6 +21,7 @@
 #include <linux/ioport.h>
 #include <linux/refcount.h>
 #include <linux/pgtable.h>
+#include <asm/machine.h>
 #include <asm/diag.h>
 #include <asm/page.h>
 #include <asm/ebcdic.h>
@@ -28,6 +29,7 @@
 #include <asm/extmem.h>
 #include <asm/cpcmd.h>
 #include <asm/setup.h>
+#include <asm/asm.h>
 
 #define DCSS_PURGESEG   0x08
 #define DCSS_LOADSHRX	0x20
@@ -134,20 +136,21 @@ dcss_diag(int *func, void *parameter,
            unsigned long *ret1, unsigned long *ret2)
 {
 	unsigned long rx, ry;
-	int rc;
+	int cc;
 
-	rx = (unsigned long) parameter;
+	rx = virt_to_phys(parameter);
 	ry = (unsigned long) *func;
 
 	diag_stat_inc(DIAG_STAT_X064);
 	asm volatile(
-		"	diag	%0,%1,0x64\n"
-		"	ipm	%2\n"
-		"	srl	%2,28\n"
-		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+		"	diag	%[rx],%[ry],0x64\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [rx] "+d" (rx), [ry] "+d" (ry)
+		:
+		: CC_CLOBBER);
 	*ret1 = rx;
 	*ret2 = ry;
-	return rc;
+	return CC_TRANSFORM(cc);
 }
 
 static inline int
@@ -178,7 +181,7 @@ query_segment_type (struct dcss_segment *seg)
 
 	/* initialize diag input parameters */
 	qin->qopcode = DCSS_FINDSEGA;
-	qin->qoutptr = (unsigned long) qout;
+	qin->qoutptr = virt_to_phys(qout);
 	qin->qoutlen = sizeof(struct qout64);
 	memcpy (qin->qname, seg->dcss_name, 8);
 
@@ -253,7 +256,7 @@ segment_type (char* name)
 	int rc;
 	struct dcss_segment seg;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -ENOSYS;
 
 	dcss_mkname(name, seg.dcss_name);
@@ -289,15 +292,17 @@ segment_overlaps_others (struct dcss_segment *seg)
 
 /*
  * real segment loading function, called from segment_load
+ * Must return either an error code < 0, or the segment type code >= 0
  */
 static int
 __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
 {
 	unsigned long start_addr, end_addr, dummy;
 	struct dcss_segment *seg;
-	int rc, diag_cc;
+	int rc, diag_cc, segtype;
 
 	start_addr = end_addr = 0;
+	segtype = -1;
 	seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
 	if (seg == NULL) {
 		rc = -ENOMEM;
@@ -326,9 +331,9 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 	seg->res_name[8] = '\0';
 	strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
 	seg->res->name = seg->res_name;
-	rc = seg->vm_segtype;
-	if (rc == SEG_TYPE_SC ||
-	    ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
+	segtype = seg->vm_segtype;
+	if (segtype == SEG_TYPE_SC ||
+	    ((segtype == SEG_TYPE_SR || segtype == SEG_TYPE_ER) && !do_nonshared))
 		seg->res->flags |= IORESOURCE_READONLY;
 
 	/* Check for overlapping resources before adding the mapping. */
@@ -386,7 +391,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
  out_free:
 	kfree(seg);
  out:
-	return rc;
+	return rc < 0 ? rc : segtype;
 }
 
 /*
@@ -414,7 +419,7 @@ segment_load (char *name, int do_nonshared, unsigned long *addr,
 	struct dcss_segment *seg;
 	int rc;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return -ENOSYS;
 
 	mutex_lock(&dcss_lock);
@@ -525,6 +530,14 @@ segment_modify_shared (char *name, int do_nonshared)
 	return rc;
 }
 
+static void __dcss_diag_purge_on_cpu_0(void *data)
+{
+	struct dcss_segment *seg = (struct dcss_segment *)data;
+	unsigned long dummy;
+
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+}
+
 /*
  * Decrease the use count of a DCSS segment and remove
  * it from the address space if nobody is using it
@@ -533,10 +546,9 @@ segment_modify_shared (char *name, int do_nonshared)
 void
 segment_unload(char *name)
 {
-	unsigned long dummy;
 	struct dcss_segment *seg;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 
 	mutex_lock(&dcss_lock);
@@ -551,7 +563,14 @@ segment_unload(char *name)
 	kfree(seg->res);
 	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
 	list_del(&seg->list);
-	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	/*
+	 * Workaround for z/VM issue, where calling the DCSS unload diag on
+	 * a non-IPL CPU would cause bogus sclp maximum memory detection on
+	 * next IPL.
+	 * IPL CPU 0 cannot be set offline, so the dcss_diag() call can
+	 * directly be scheduled to that CPU.
+	 */
+	smp_call_function_single(0, __dcss_diag_purge_on_cpu_0, seg, 1);
 	kfree(seg);
 out_unlock:
 	mutex_unlock(&dcss_lock);
@@ -568,7 +587,7 @@ segment_save(char *name)
 	char cmd2[80];
 	int i, response;
 
-	if (!MACHINE_IS_VM)
+	if (!machine_is_vm())
 		return;
 
 	mutex_lock(&dcss_lock);
@@ -638,10 +657,13 @@ void segment_warning(int rc, char *seg_name)
 		pr_err("There is not enough memory to load or query "
 		       "DCSS %s\n", seg_name);
 		break;
-	case -ERANGE:
-		pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
-		       "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
+	case -ERANGE: {
+		struct range mhp_range = arch_get_mappable_range();
+
+		pr_err("DCSS %s exceeds the kernel mapping range (%llu) "
+		       "and cannot be loaded\n", seg_name, mhp_range.end + 1);
 		break;
+	}
 	default:
 		break;
 	}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e173b6187ad5..e1ad05bfd28a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -3,13 +3,15 @@
  *  S390 version
  *    Copyright IBM Corp. 1999
  *    Author(s): Hartmut Penner (hp@de.ibm.com)
- *               Ulrich Weigand (uweigand@de.ibm.com)
+ *		 Ulrich Weigand (uweigand@de.ibm.com)
  *
  *  Derived from "arch/i386/mm/fault.c"
  *    Copyright (C) 1995  Linus Torvalds
  */
 
 #include <linux/kernel_stat.h>
+#include <linux/mmu_context.h>
+#include <linux/cpufeature.h>
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
@@ -32,123 +34,93 @@
 #include <linux/uaccess.h>
 #include <linux/hugetlb.h>
 #include <linux/kfence.h>
+#include <linux/pagewalk.h>
 #include <asm/asm-extable.h>
 #include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/fault.h>
 #include <asm/diag.h>
-#include <asm/gmap.h>
 #include <asm/irq.h>
-#include <asm/mmu_context.h>
 #include <asm/facility.h>
 #include <asm/uv.h>
 #include "../kernel/entry.h"
 
-#define __FAIL_ADDR_MASK -4096L
-#define __SUBCODE_MASK 0x0600
-#define __PF_RES_FIELD 0x8000000000000000ULL
-
-#define VM_FAULT_BADCONTEXT	((__force vm_fault_t) 0x010000)
-#define VM_FAULT_BADMAP		((__force vm_fault_t) 0x020000)
-#define VM_FAULT_BADACCESS	((__force vm_fault_t) 0x040000)
-#define VM_FAULT_SIGNAL		((__force vm_fault_t) 0x080000)
-#define VM_FAULT_PFAULT		((__force vm_fault_t) 0x100000)
-
-enum fault_type {
-	KERNEL_FAULT,
-	USER_FAULT,
-	GMAP_FAULT,
-};
-
-static unsigned long store_indication __read_mostly;
-
-static int __init fault_init(void)
-{
-	if (test_facility(75))
-		store_indication = 0xc00;
-	return 0;
-}
-early_initcall(fault_init);
-
 /*
  * Find out which address space caused the exception.
  */
-static enum fault_type get_fault_type(struct pt_regs *regs)
+static bool is_kernel_fault(struct pt_regs *regs)
 {
-	unsigned long trans_exc_code;
+	union teid teid = { .val = regs->int_parm_long };
 
-	trans_exc_code = regs->int_parm_long & 3;
-	if (likely(trans_exc_code == 0)) {
-		/* primary space exception */
-		if (user_mode(regs))
-			return USER_FAULT;
-		if (!IS_ENABLED(CONFIG_PGSTE))
-			return KERNEL_FAULT;
-		if (test_pt_regs_flag(regs, PIF_GUEST_FAULT))
-			return GMAP_FAULT;
-		return KERNEL_FAULT;
-	}
-	if (trans_exc_code == 2)
-		return USER_FAULT;
-	if (trans_exc_code == 1) {
-		/* access register mode, not used in the kernel */
-		return USER_FAULT;
-	}
-	/* home space exception -> access via kernel ASCE */
-	return KERNEL_FAULT;
+	if (user_mode(regs))
+		return false;
+	if (teid.as == PSW_BITS_AS_SECONDARY)
+		return false;
+	return true;
 }
 
-static int bad_address(void *p)
+static unsigned long get_fault_address(struct pt_regs *regs)
 {
-	unsigned long dummy;
+	union teid teid = { .val = regs->int_parm_long };
 
-	return get_kernel_nofault(dummy, (unsigned long *)p);
+	return teid.addr * PAGE_SIZE;
+}
+
+static __always_inline bool fault_is_write(struct pt_regs *regs)
+{
+	union teid teid = { .val = regs->int_parm_long };
+
+	if (test_facility(75))
+		return teid.fsi == TEID_FSI_STORE;
+	return false;
 }
 
 static void dump_pagetable(unsigned long asce, unsigned long address)
 {
-	unsigned long *table = __va(asce & _ASCE_ORIGIN);
+	unsigned long entry, *table = __va(asce & _ASCE_ORIGIN);
 
 	pr_alert("AS:%016lx ", asce);
 	switch (asce & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
 		table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
-		if (bad_address(table))
+		if (get_kernel_nofault(entry, table))
 			goto bad;
-		pr_cont("R1:%016lx ", *table);
-		if (*table & _REGION_ENTRY_INVALID)
+		pr_cont("R1:%016lx ", entry);
+		if (entry & _REGION_ENTRY_INVALID)
 			goto out;
-		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(entry & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
-		if (bad_address(table))
+		if (get_kernel_nofault(entry, table))
 			goto bad;
-		pr_cont("R2:%016lx ", *table);
-		if (*table & _REGION_ENTRY_INVALID)
+		pr_cont("R2:%016lx ", entry);
+		if (entry & _REGION_ENTRY_INVALID)
 			goto out;
-		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(entry & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
-		if (bad_address(table))
+		if (get_kernel_nofault(entry, table))
 			goto bad;
-		pr_cont("R3:%016lx ", *table);
-		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+		pr_cont("R3:%016lx ", entry);
+		if (entry & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
 			goto out;
-		table = __va(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(entry & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
-		if (bad_address(table))
+		if (get_kernel_nofault(entry, table))
 			goto bad;
-		pr_cont("S:%016lx ", *table);
-		if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+		pr_cont("S:%016lx ", entry);
+		if (entry & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
 			goto out;
-		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
+		table = __va(entry & _SEGMENT_ENTRY_ORIGIN);
 	}
-	table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
-	if (bad_address(table))
+	table += (address & _PAGE_INDEX) >> PAGE_SHIFT;
+	if (get_kernel_nofault(entry, table))
 		goto bad;
-	pr_cont("P:%016lx ", *table);
+	pr_cont("P:%016lx ", entry);
 out:
 	pr_cont("\n");
 	return;
@@ -158,163 +130,118 @@ bad:
 
 static void dump_fault_info(struct pt_regs *regs)
 {
+	union teid teid = { .val = regs->int_parm_long };
 	unsigned long asce;
 
 	pr_alert("Failing address: %016lx TEID: %016lx\n",
-		 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+		 get_fault_address(regs), teid.val);
 	pr_alert("Fault in ");
-	switch (regs->int_parm_long & 3) {
-	case 3:
+	switch (teid.as) {
+	case PSW_BITS_AS_HOME:
 		pr_cont("home space ");
 		break;
-	case 2:
+	case PSW_BITS_AS_SECONDARY:
 		pr_cont("secondary space ");
 		break;
-	case 1:
+	case PSW_BITS_AS_ACCREG:
 		pr_cont("access register ");
 		break;
-	case 0:
+	case PSW_BITS_AS_PRIMARY:
 		pr_cont("primary space ");
 		break;
 	}
 	pr_cont("mode while using ");
-	switch (get_fault_type(regs)) {
-	case USER_FAULT:
-		asce = S390_lowcore.user_asce;
-		pr_cont("user ");
-		break;
-	case GMAP_FAULT:
-		asce = ((struct gmap *) S390_lowcore.gmap)->asce;
-		pr_cont("gmap ");
-		break;
-	case KERNEL_FAULT:
-		asce = S390_lowcore.kernel_asce;
+	if (is_kernel_fault(regs)) {
+		asce = get_lowcore()->kernel_asce.val;
 		pr_cont("kernel ");
-		break;
-	default:
-		unreachable();
+	} else {
+		asce = get_lowcore()->user_asce.val;
+		pr_cont("user ");
 	}
 	pr_cont("ASCE.\n");
-	dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+	dump_pagetable(asce, get_fault_address(regs));
 }
 
 int show_unhandled_signals = 1;
 
+static const struct ctl_table s390_fault_sysctl_table[] = {
+	{
+		.procname	= "userprocess_debug",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+};
+
+static int __init init_s390_fault_sysctls(void)
+{
+	register_sysctl_init("kernel", s390_fault_sysctl_table);
+	return 0;
+}
+arch_initcall(init_s390_fault_sysctls);
+
 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
 {
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
+
 	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
 		return;
 	if (!unhandled_signal(current, signr))
 		return;
-	if (!printk_ratelimit())
+	if (!__ratelimit(&rs))
 		return;
-	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
-	       regs->int_code & 0xffff, regs->int_code >> 17);
+	pr_alert("User process fault: interruption code %04x ilc:%d ",
+		 regs->int_code & 0xffff, regs->int_code >> 17);
 	print_vma_addr(KERN_CONT "in ", regs->psw.addr);
-	printk(KERN_CONT "\n");
+	pr_cont("\n");
 	if (is_mm_fault)
 		dump_fault_info(regs);
 	show_regs(regs);
 }
 
-/*
- * Send SIGSEGV to task.  This is an external routine
- * to keep the stack usage of do_page_fault small.
- */
-static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+static void do_sigsegv(struct pt_regs *regs, int si_code)
 {
 	report_user_fault(regs, SIGSEGV, 1);
-	force_sig_fault(SIGSEGV, si_code,
-			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
+	force_sig_fault(SIGSEGV, si_code, (void __user *)get_fault_address(regs));
 }
 
-static noinline void do_no_context(struct pt_regs *regs)
+static void handle_fault_error_nolock(struct pt_regs *regs, int si_code)
 {
+	unsigned long address;
+	bool is_write;
+
+	if (user_mode(regs)) {
+		if (WARN_ON_ONCE(!si_code))
+			si_code = SEGV_MAPERR;
+		return do_sigsegv(regs, si_code);
+	}
 	if (fixup_exception(regs))
 		return;
-	/*
-	 * Oops. The kernel tried to access some bad page. We'll have to
-	 * terminate things with extreme prejudice.
-	 */
-	if (get_fault_type(regs) == KERNEL_FAULT)
-		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
-		       " in virtual kernel address space\n");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request"
-		       " in virtual user address space\n");
+	if (is_kernel_fault(regs)) {
+		address = get_fault_address(regs);
+		is_write = fault_is_write(regs);
+		if (kfence_handle_page_fault(address, is_write, regs))
+			return;
+		pr_alert("Unable to handle kernel pointer dereference in virtual kernel address space\n");
+	} else {
+		pr_alert("Unable to handle kernel paging request in virtual user address space\n");
+	}
 	dump_fault_info(regs);
 	die(regs, "Oops");
 }
 
-static noinline void do_low_address(struct pt_regs *regs)
+static void handle_fault_error(struct pt_regs *regs, int si_code)
 {
-	/* Low-address protection hit in kernel mode means
-	   NULL pointer write access in kernel mode.  */
-	if (regs->psw.mask & PSW_MASK_PSTATE) {
-		/* Low-address protection hit in user mode 'cannot happen'. */
-		die (regs, "Low-address protection");
-	}
+	struct mm_struct *mm = current->mm;
 
-	do_no_context(regs);
+	mmap_read_unlock(mm);
+	handle_fault_error_nolock(regs, si_code);
 }
 
-static noinline void do_sigbus(struct pt_regs *regs)
+static void do_sigbus(struct pt_regs *regs)
 {
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-	force_sig_fault(SIGBUS, BUS_ADRERR,
-			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
-}
-
-static noinline void do_fault_error(struct pt_regs *regs, int access,
-					vm_fault_t fault)
-{
-	int si_code;
-
-	switch (fault) {
-	case VM_FAULT_BADACCESS:
-	case VM_FAULT_BADMAP:
-		/* Bad memory access. Check if it is kernel or user space. */
-		if (user_mode(regs)) {
-			/* User mode accesses just cause a SIGSEGV */
-			si_code = (fault == VM_FAULT_BADMAP) ?
-				SEGV_MAPERR : SEGV_ACCERR;
-			do_sigsegv(regs, si_code);
-			break;
-		}
-		fallthrough;
-	case VM_FAULT_BADCONTEXT:
-	case VM_FAULT_PFAULT:
-		do_no_context(regs);
-		break;
-	case VM_FAULT_SIGNAL:
-		if (!user_mode(regs))
-			do_no_context(regs);
-		break;
-	default: /* fault & VM_FAULT_ERROR */
-		if (fault & VM_FAULT_OOM) {
-			if (!user_mode(regs))
-				do_no_context(regs);
-			else
-				pagefault_out_of_memory();
-		} else if (fault & VM_FAULT_SIGSEGV) {
-			/* Kernel mode? Handle exceptions or die */
-			if (!user_mode(regs))
-				do_no_context(regs);
-			else
-				do_sigsegv(regs, SEGV_MAPERR);
-		} else if (fault & VM_FAULT_SIGBUS) {
-			/* Kernel mode? Handle exceptions or die */
-			if (!user_mode(regs))
-				do_no_context(regs);
-			else
-				do_sigbus(regs);
-		} else
-			BUG();
-		break;
-	}
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)get_fault_address(regs));
 }
 
 /*
@@ -323,161 +250,116 @@ static noinline void do_fault_error(struct pt_regs *regs, int access,
  * routines.
  *
  * interruption code (int_code):
- *   04       Protection           ->  Write-Protection  (suppression)
- *   10       Segment translation  ->  Not present       (nullification)
- *   11       Page translation     ->  Not present       (nullification)
- *   3b       Region third trans.  ->  Not present       (nullification)
+ *   04       Protection	   ->  Write-Protection  (suppression)
+ *   10       Segment translation  ->  Not present	 (nullification)
+ *   11       Page translation	   ->  Not present	 (nullification)
+ *   3b       Region third trans.  ->  Not present	 (nullification)
  */
-static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
+static void do_exception(struct pt_regs *regs, int access)
 {
-	struct gmap *gmap;
-	struct task_struct *tsk;
-	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	enum fault_type type;
-	unsigned long trans_exc_code;
 	unsigned long address;
+	struct mm_struct *mm;
 	unsigned int flags;
 	vm_fault_t fault;
 	bool is_write;
 
-	tsk = current;
 	/*
 	 * The instruction that caused the program check has
 	 * been nullified. Don't signal single step via SIGTRAP.
 	 */
 	clear_thread_flag(TIF_PER_TRAP);
-
 	if (kprobe_page_fault(regs, 14))
-		return 0;
-
-	mm = tsk->mm;
-	trans_exc_code = regs->int_parm_long;
-	address = trans_exc_code & __FAIL_ADDR_MASK;
-	is_write = (trans_exc_code & store_indication) == 0x400;
-
-	/*
-	 * Verify that the fault happened in user space, that
-	 * we are not in an interrupt and that there is a 
-	 * user context.
-	 */
-	fault = VM_FAULT_BADCONTEXT;
-	type = get_fault_type(regs);
-	switch (type) {
-	case KERNEL_FAULT:
-		if (kfence_handle_page_fault(address, is_write, regs))
-			return 0;
-		goto out;
-	case USER_FAULT:
-	case GMAP_FAULT:
-		if (faulthandler_disabled() || !mm)
-			goto out;
-		break;
-	}
-
+		return;
+	mm = current->mm;
+	address = get_fault_address(regs);
+	is_write = fault_is_write(regs);
+	if (is_kernel_fault(regs) || faulthandler_disabled() || !mm)
+		return handle_fault_error_nolock(regs, 0);
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 	flags = FAULT_FLAG_DEFAULT;
 	if (user_mode(regs))
 		flags |= FAULT_FLAG_USER;
-	if (access == VM_WRITE || is_write)
+	if (is_write)
+		access = VM_WRITE;
+	if (access == VM_WRITE)
 		flags |= FAULT_FLAG_WRITE;
-	mmap_read_lock(mm);
-
-	gmap = NULL;
-	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
-		gmap = (struct gmap *) S390_lowcore.gmap;
-		current->thread.gmap_addr = address;
-		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
-		current->thread.gmap_int_code = regs->int_code & 0xffff;
-		address = __gmap_translate(gmap, address);
-		if (address == -EFAULT) {
-			fault = VM_FAULT_BADMAP;
-			goto out_up;
-		}
-		if (gmap->pfault_enabled)
-			flags |= FAULT_FLAG_RETRY_NOWAIT;
+	if (!(flags & FAULT_FLAG_USER))
+		goto lock_mmap;
+	vma = lock_vma_under_rcu(mm, address);
+	if (!vma)
+		goto lock_mmap;
+	if (!(vma->vm_flags & access)) {
+		vma_end_read(vma);
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		return handle_fault_error_nolock(regs, SEGV_ACCERR);
 	}
-
+	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+		vma_end_read(vma);
+	if (!(fault & VM_FAULT_RETRY)) {
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		goto done;
+	}
+	count_vm_vma_lock_event(VMA_LOCK_RETRY);
+	if (fault & VM_FAULT_MAJOR)
+		flags |= FAULT_FLAG_TRIED;
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			handle_fault_error_nolock(regs, 0);
+		return;
+	}
+lock_mmap:
 retry:
-	fault = VM_FAULT_BADMAP;
-	vma = find_vma(mm, address);
+	vma = lock_mm_and_find_vma(mm, address, regs);
 	if (!vma)
-		goto out_up;
-
-	if (unlikely(vma->vm_start > address)) {
-		if (!(vma->vm_flags & VM_GROWSDOWN))
-			goto out_up;
-		if (expand_stack(vma, address))
-			goto out_up;
-	}
-
-	/*
-	 * Ok, we have a good vm_area for this memory access, so
-	 * we can handle it..
-	 */
-	fault = VM_FAULT_BADACCESS;
+		return handle_fault_error_nolock(regs, SEGV_MAPERR);
 	if (unlikely(!(vma->vm_flags & access)))
-		goto out_up;
-
-	if (is_vm_hugetlb_page(vma))
-		address &= HPAGE_MASK;
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
+		return handle_fault_error(regs, SEGV_ACCERR);
 	fault = handle_mm_fault(vma, address, flags, regs);
 	if (fault_signal_pending(fault, regs)) {
-		fault = VM_FAULT_SIGNAL;
-		if (flags & FAULT_FLAG_RETRY_NOWAIT)
-			goto out_up;
-		goto out;
+		if (!user_mode(regs))
+			handle_fault_error_nolock(regs, 0);
+		return;
 	}
-	if (unlikely(fault & VM_FAULT_ERROR))
-		goto out_up;
-
+	/* The fault is fully completed (including releasing mmap lock) */
+	if (fault & VM_FAULT_COMPLETED)
+		return;
 	if (fault & VM_FAULT_RETRY) {
-		if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
-			(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			/*
-			 * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
-			 * not been released
-			 */
-			current->thread.gmap_pfault = 1;
-			fault = VM_FAULT_PFAULT;
-			goto out_up;
-		}
-		flags &= ~FAULT_FLAG_RETRY_NOWAIT;
 		flags |= FAULT_FLAG_TRIED;
-		mmap_read_lock(mm);
 		goto retry;
 	}
-	if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
-		address =  __gmap_link(gmap, current->thread.gmap_addr,
-				       address);
-		if (address == -EFAULT) {
-			fault = VM_FAULT_BADMAP;
-			goto out_up;
-		}
-		if (address == -ENOMEM) {
-			fault = VM_FAULT_OOM;
-			goto out_up;
-		}
-	}
-	fault = 0;
-out_up:
 	mmap_read_unlock(mm);
-out:
-	return fault;
+done:
+	if (!(fault & VM_FAULT_ERROR))
+		return;
+	if (fault & VM_FAULT_OOM) {
+		if (!user_mode(regs))
+			handle_fault_error_nolock(regs, 0);
+		else
+			pagefault_out_of_memory();
+	} else if (fault & VM_FAULT_SIGSEGV) {
+		if (!user_mode(regs))
+			handle_fault_error_nolock(regs, 0);
+		else
+			do_sigsegv(regs, SEGV_MAPERR);
+	} else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON |
+			    VM_FAULT_HWPOISON_LARGE)) {
+		if (!user_mode(regs))
+			handle_fault_error_nolock(regs, 0);
+		else
+			do_sigbus(regs);
+	} else {
+		pr_emerg("Unexpected fault flags: %08x\n", fault);
+		BUG();
+	}
 }
 
 void do_protection_exception(struct pt_regs *regs)
 {
-	unsigned long trans_exc_code;
-	int access;
-	vm_fault_t fault;
+	union teid teid = { .val = regs->int_parm_long };
 
-	trans_exc_code = regs->int_parm_long;
 	/*
 	 * Protection exceptions are suppressing, decrement psw address.
 	 * The exception to this rule are aborted transactions, for these
@@ -490,281 +372,51 @@ void do_protection_exception(struct pt_regs *regs)
 	 * as a special case because the translation exception code
 	 * field is not guaranteed to contain valid data in this case.
 	 */
-	if (unlikely(!(trans_exc_code & 4))) {
-		do_low_address(regs);
-		return;
+	if (unlikely(!teid.b61)) {
+		if (user_mode(regs)) {
+			/* Low-address protection in user mode: cannot happen */
+			dump_fault_info(regs);
+			die(regs, "Low-address protection");
+		}
+		/*
+		 * Low-address protection in kernel mode means
+		 * NULL pointer write access in kernel mode.
+		 */
+		return handle_fault_error_nolock(regs, 0);
 	}
-	if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
-		regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
-					(regs->psw.addr & PAGE_MASK);
-		access = VM_EXEC;
-		fault = VM_FAULT_BADACCESS;
-	} else {
-		access = VM_WRITE;
-		fault = do_exception(regs, access);
+	if (unlikely(cpu_has_nx() && teid.b56)) {
+		regs->int_parm_long = (teid.addr * PAGE_SIZE) | (regs->psw.addr & PAGE_MASK);
+		return handle_fault_error_nolock(regs, SEGV_ACCERR);
 	}
-	if (unlikely(fault))
-		do_fault_error(regs, access, fault);
+	do_exception(regs, VM_WRITE);
 }
 NOKPROBE_SYMBOL(do_protection_exception);
 
 void do_dat_exception(struct pt_regs *regs)
 {
-	int access;
-	vm_fault_t fault;
-
-	access = VM_ACCESS_FLAGS;
-	fault = do_exception(regs, access);
-	if (unlikely(fault))
-		do_fault_error(regs, access, fault);
+	do_exception(regs, VM_ACCESS_FLAGS);
 }
 NOKPROBE_SYMBOL(do_dat_exception);
 
-#ifdef CONFIG_PFAULT 
-/*
- * 'pfault' pseudo page faults routines.
- */
-static int pfault_disable;
-
-static int __init nopfault(char *str)
-{
-	pfault_disable = 1;
-	return 1;
-}
-
-__setup("nopfault", nopfault);
-
-struct pfault_refbk {
-	u16 refdiagc;
-	u16 reffcode;
-	u16 refdwlen;
-	u16 refversn;
-	u64 refgaddr;
-	u64 refselmk;
-	u64 refcmpmk;
-	u64 reserved;
-} __attribute__ ((packed, aligned(8)));
-
-static struct pfault_refbk pfault_init_refbk = {
-	.refdiagc = 0x258,
-	.reffcode = 0,
-	.refdwlen = 5,
-	.refversn = 2,
-	.refgaddr = __LC_LPP,
-	.refselmk = 1ULL << 48,
-	.refcmpmk = 1ULL << 48,
-	.reserved = __PF_RES_FIELD
-};
-
-int pfault_init(void)
-{
-        int rc;
-
-	if (pfault_disable)
-		return -1;
-	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
-		"	diag	%1,%0,0x258\n"
-		"0:	j	2f\n"
-		"1:	la	%0,8\n"
-		"2:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc)
-		: "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
-        return rc;
-}
-
-static struct pfault_refbk pfault_fini_refbk = {
-	.refdiagc = 0x258,
-	.reffcode = 1,
-	.refdwlen = 5,
-	.refversn = 2,
-};
-
-void pfault_fini(void)
-{
-
-	if (pfault_disable)
-		return;
-	diag_stat_inc(DIAG_STAT_X258);
-	asm volatile(
-		"	diag	%0,0,0x258\n"
-		"0:	nopr	%%r7\n"
-		EX_TABLE(0b,0b)
-		: : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
-}
-
-static DEFINE_SPINLOCK(pfault_lock);
-static LIST_HEAD(pfault_list);
-
-#define PF_COMPLETE	0x0080
-
-/*
- * The mechanism of our pfault code: if Linux is running as guest, runs a user
- * space process and the user space process accesses a page that the host has
- * paged out we get a pfault interrupt.
- *
- * This allows us, within the guest, to schedule a different process. Without
- * this mechanism the host would have to suspend the whole virtual cpu until
- * the page has been paged in.
- *
- * So when we get such an interrupt then we set the state of the current task
- * to uninterruptible and also set the need_resched flag. Both happens within
- * interrupt context(!). If we later on want to return to user space we
- * recognize the need_resched flag and then call schedule().  It's not very
- * obvious how this works...
- *
- * Of course we have a lot of additional fun with the completion interrupt (->
- * host signals that a page of a process has been paged in and the process can
- * continue to run). This interrupt can arrive on any cpu and, since we have
- * virtual cpus, actually appear before the interrupt that signals that a page
- * is missing.
- */
-static void pfault_interrupt(struct ext_code ext_code,
-			     unsigned int param32, unsigned long param64)
-{
-	struct task_struct *tsk;
-	__u16 subcode;
-	pid_t pid;
-
-	/*
-	 * Get the external interruption subcode & pfault initial/completion
-	 * signal bit. VM stores this in the 'cpu address' field associated
-	 * with the external interrupt.
-	 */
-	subcode = ext_code.subcode;
-	if ((subcode & 0xff00) != __SUBCODE_MASK)
-		return;
-	inc_irq_stat(IRQEXT_PFL);
-	/* Get the token (= pid of the affected task). */
-	pid = param64 & LPP_PID_MASK;
-	rcu_read_lock();
-	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
-	if (tsk)
-		get_task_struct(tsk);
-	rcu_read_unlock();
-	if (!tsk)
-		return;
-	spin_lock(&pfault_lock);
-	if (subcode & PF_COMPLETE) {
-		/* signal bit is set -> a page has been swapped in by VM */
-		if (tsk->thread.pfault_wait == 1) {
-			/* Initial interrupt was faster than the completion
-			 * interrupt. pfault_wait is valid. Set pfault_wait
-			 * back to zero and wake up the process. This can
-			 * safely be done because the task is still sleeping
-			 * and can't produce new pfaults. */
-			tsk->thread.pfault_wait = 0;
-			list_del(&tsk->thread.list);
-			wake_up_process(tsk);
-			put_task_struct(tsk);
-		} else {
-			/* Completion interrupt was faster than initial
-			 * interrupt. Set pfault_wait to -1 so the initial
-			 * interrupt doesn't put the task to sleep.
-			 * If the task is not running, ignore the completion
-			 * interrupt since it must be a leftover of a PFAULT
-			 * CANCEL operation which didn't remove all pending
-			 * completion interrupts. */
-			if (task_is_running(tsk))
-				tsk->thread.pfault_wait = -1;
-		}
-	} else {
-		/* signal bit not set -> a real page is missing. */
-		if (WARN_ON_ONCE(tsk != current))
-			goto out;
-		if (tsk->thread.pfault_wait == 1) {
-			/* Already on the list with a reference: put to sleep */
-			goto block;
-		} else if (tsk->thread.pfault_wait == -1) {
-			/* Completion interrupt was faster than the initial
-			 * interrupt (pfault_wait == -1). Set pfault_wait
-			 * back to zero and exit. */
-			tsk->thread.pfault_wait = 0;
-		} else {
-			/* Initial interrupt arrived before completion
-			 * interrupt. Let the task sleep.
-			 * An extra task reference is needed since a different
-			 * cpu may set the task state to TASK_RUNNING again
-			 * before the scheduler is reached. */
-			get_task_struct(tsk);
-			tsk->thread.pfault_wait = 1;
-			list_add(&tsk->thread.list, &pfault_list);
-block:
-			/* Since this must be a userspace fault, there
-			 * is no kernel task state to trample. Rely on the
-			 * return to userspace schedule() to block. */
-			__set_current_state(TASK_UNINTERRUPTIBLE);
-			set_tsk_need_resched(tsk);
-			set_preempt_need_resched();
-		}
-	}
-out:
-	spin_unlock(&pfault_lock);
-	put_task_struct(tsk);
-}
-
-static int pfault_cpu_dead(unsigned int cpu)
-{
-	struct thread_struct *thread, *next;
-	struct task_struct *tsk;
-
-	spin_lock_irq(&pfault_lock);
-	list_for_each_entry_safe(thread, next, &pfault_list, list) {
-		thread->pfault_wait = 0;
-		list_del(&thread->list);
-		tsk = container_of(thread, struct task_struct, thread);
-		wake_up_process(tsk);
-		put_task_struct(tsk);
-	}
-	spin_unlock_irq(&pfault_lock);
-	return 0;
-}
-
-static int __init pfault_irq_init(void)
-{
-	int rc;
-
-	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-	if (rc)
-		goto out_extint;
-	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
-	if (rc)
-		goto out_pfault;
-	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
-	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
-				  NULL, pfault_cpu_dead);
-	return 0;
-
-out_pfault:
-	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-out_extint:
-	pfault_disable = 1;
-	return rc;
-}
-early_initcall(pfault_irq_init);
-
-#endif /* CONFIG_PFAULT */
-
 #if IS_ENABLED(CONFIG_PGSTE)
 
 void do_secure_storage_access(struct pt_regs *regs)
 {
-	unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	union teid teid = { .val = regs->int_parm_long };
+	unsigned long addr = get_fault_address(regs);
 	struct vm_area_struct *vma;
+	struct folio_walk fw;
 	struct mm_struct *mm;
-	struct page *page;
+	struct folio *folio;
 	int rc;
 
 	/*
-	 * bit 61 tells us if the address is valid, if it's not we
-	 * have a major problem and should stop the kernel or send a
-	 * SIGSEGV to the process. Unfortunately bit 61 is not
-	 * reliable without the misc UV feature so we need to check
-	 * for that as well.
+	 * Bit 61 indicates if the address is valid, if it is not the
+	 * kernel should be stopped or SIGSEGV should be sent to the
+	 * process. Bit 61 is not reliable without the misc UV feature,
+	 * therefore this needs to be checked too.
 	 */
-	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
-	    !test_bit_inv(61, &regs->int_parm_long)) {
+	if (uv_has_feature(BIT_UV_FEAT_MISC) && !teid.b61) {
 		/*
 		 * When this happens, userspace did something that it
 		 * was not supposed to do, e.g. branching into secure
@@ -774,80 +426,43 @@ void do_secure_storage_access(struct pt_regs *regs)
 			send_sig(SIGSEGV, current, 0);
 			return;
 		}
-
 		/*
-		 * The kernel should never run into this case and we
-		 * have no way out of this situation.
+		 * The kernel should never run into this case and
+		 * there is no way out of this situation.
 		 */
 		panic("Unexpected PGM 0x3d with TEID bit 61=0");
 	}
-
-	switch (get_fault_type(regs)) {
-	case USER_FAULT:
+	if (is_kernel_fault(regs)) {
+		folio = phys_to_folio(addr);
+		if (unlikely(!folio_try_get(folio)))
+			return;
+		rc = arch_make_folio_accessible(folio);
+		folio_put(folio);
+		if (rc)
+			BUG();
+	} else {
+		if (faulthandler_disabled())
+			return handle_fault_error_nolock(regs, 0);
 		mm = current->mm;
 		mmap_read_lock(mm);
 		vma = find_vma(mm, addr);
-		if (!vma) {
-			mmap_read_unlock(mm);
-			do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
-			break;
-		}
-		page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
-		if (IS_ERR_OR_NULL(page)) {
+		if (!vma)
+			return handle_fault_error(regs, SEGV_MAPERR);
+		folio = folio_walk_start(&fw, vma, addr, 0);
+		if (!folio) {
 			mmap_read_unlock(mm);
-			break;
+			return;
 		}
-		if (arch_make_page_accessible(page))
+		/* arch_make_folio_accessible() needs a raised refcount. */
+		folio_get(folio);
+		rc = arch_make_folio_accessible(folio);
+		folio_put(folio);
+		folio_walk_end(&fw, vma);
+		if (rc)
 			send_sig(SIGSEGV, current, 0);
-		put_page(page);
 		mmap_read_unlock(mm);
-		break;
-	case KERNEL_FAULT:
-		page = phys_to_page(addr);
-		if (unlikely(!try_get_page(page)))
-			break;
-		rc = arch_make_page_accessible(page);
-		put_page(page);
-		if (rc)
-			BUG();
-		break;
-	case GMAP_FAULT:
-	default:
-		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
-		WARN_ON_ONCE(1);
 	}
 }
 NOKPROBE_SYMBOL(do_secure_storage_access);
 
-void do_non_secure_storage_access(struct pt_regs *regs)
-{
-	unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
-	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
-
-	if (get_fault_type(regs) != GMAP_FAULT) {
-		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
-		send_sig(SIGSEGV, current, 0);
-}
-NOKPROBE_SYMBOL(do_non_secure_storage_access);
-
-void do_secure_storage_violation(struct pt_regs *regs)
-{
-	/*
-	 * Either KVM messed up the secure guest mapping or the same
-	 * page is mapped into multiple secure guests.
-	 *
-	 * This exception is only triggered when a guest 2 is running
-	 * and can therefore never occur in kernel context.
-	 */
-	printk_ratelimited(KERN_WARNING
-			   "Secure storage violation in task: %s, pid %d\n",
-			   current->comm, current->pid);
-	send_sig(SIGSEGV, current, 0);
-}
-
 #endif /* CONFIG_PGSTE */
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b8ae4a4aa2ba..012a4366a2ad 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -8,6 +8,7 @@
  *		 Janosch Frank <frankja@linux.vnet.ibm.com>
  */
 
+#include <linux/cpufeature.h>
 #include <linux/kernel.h>
 #include <linux/pagewalk.h>
 #include <linux/swap.h>
@@ -18,20 +19,43 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 #include <linux/pgtable.h>
-
+#include <asm/page-states.h>
 #include <asm/pgalloc.h>
+#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
 #include <asm/gmap.h>
-#include <asm/tlb.h>
+#include <asm/page.h>
+
+/*
+ * The address is saved in a radix tree directly; NULL would be ambiguous,
+ * since 0 is a valid address, and NULL is returned when nothing was found.
+ * The lower bits are ignored by all users of the macro, so it can be used
+ * to distinguish a valid address 0 from a NULL.
+ */
+#define VALID_GADDR_FLAG 1
+#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG)
+#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG)
 
 #define GMAP_SHADOW_FAKE_TABLE 1ULL
 
+static struct page *gmap_alloc_crst(void)
+{
+	struct page *page;
+
+	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	if (!page)
+		return NULL;
+	__arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
+	return page;
+}
+
 /**
  * gmap_alloc - allocate and initialize a guest address space
  * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
  */
-static struct gmap *gmap_alloc(unsigned long limit)
+struct gmap *gmap_alloc(unsigned long limit)
 {
 	struct gmap *gmap;
 	struct page *page;
@@ -58,21 +82,17 @@ static struct gmap *gmap_alloc(unsigned long limit)
 	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
 	if (!gmap)
 		goto out;
-	INIT_LIST_HEAD(&gmap->crst_list);
 	INIT_LIST_HEAD(&gmap->children);
-	INIT_LIST_HEAD(&gmap->pt_list);
 	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
 	INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
 	spin_lock_init(&gmap->guest_table_lock);
 	spin_lock_init(&gmap->shadow_lock);
 	refcount_set(&gmap->ref_count, 1);
-	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	page = gmap_alloc_crst();
 	if (!page)
 		goto out_free;
-	page->index = 0;
-	list_add(&page->lru, &gmap->crst_list);
-	table = (unsigned long *) page_to_phys(page);
+	table = page_to_virt(page);
 	crst_table_init(table, etype);
 	gmap->table = table;
 	gmap->asce = atype | _ASCE_TABLE_LENGTH |
@@ -85,6 +105,7 @@ out_free:
 out:
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(gmap_alloc);
 
 /**
  * gmap_create - create a guest address space
@@ -116,7 +137,7 @@ EXPORT_SYMBOL_GPL(gmap_create);
 
 static void gmap_flush_tlb(struct gmap *gmap)
 {
-	if (MACHINE_HAS_IDTE)
+	if (cpu_has_idte())
 		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
@@ -173,30 +194,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root)
 	} while (nr > 0);
 }
 
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+	bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+	int i;
+
+	if (is_segment) {
+		if (!free_ptes)
+			goto out;
+		for (i = 0; i < _CRST_ENTRIES; i++)
+			if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+				page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+	} else {
+		for (i = 0; i < _CRST_ENTRIES; i++)
+			if (!(table[i] & _REGION_ENTRY_INVALID))
+				gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+	}
+
+out:
+	free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
 /**
  * gmap_free - free a guest address space
  * @gmap: pointer to the guest address space structure
  *
  * No locks required. There are no references to this gmap anymore.
  */
-static void gmap_free(struct gmap *gmap)
+void gmap_free(struct gmap *gmap)
 {
-	struct page *page, *next;
-
 	/* Flush tlb of all gmaps (if not already done for shadows) */
 	if (!(gmap_is_shadow(gmap) && gmap->removed))
 		gmap_flush_tlb(gmap);
 	/* Free all segment & region tables. */
-	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, CRST_ALLOC_ORDER);
+	gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 
 	/* Free additional data for a shadow gmap */
 	if (gmap_is_shadow(gmap)) {
-		/* Free all page tables. */
-		list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
-			page_table_free_pgste(page);
 		gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
 		/* Release reference to the parent */
 		gmap_put(gmap->parent);
@@ -204,6 +241,7 @@ static void gmap_free(struct gmap *gmap)
 
 	kfree(gmap);
 }
+EXPORT_SYMBOL_GPL(gmap_free);
 
 /**
  * gmap_get - increase reference counter for guest address space
@@ -267,37 +305,6 @@ void gmap_remove(struct gmap *gmap)
 }
 EXPORT_SYMBOL_GPL(gmap_remove);
 
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
-{
-	S390_lowcore.gmap = (unsigned long) gmap;
-}
-EXPORT_SYMBOL_GPL(gmap_enable);
-
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
-{
-	S390_lowcore.gmap = 0UL;
-}
-EXPORT_SYMBOL_GPL(gmap_disable);
-
-/**
- * gmap_get_enabled - get a pointer to the currently enabled gmap
- *
- * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
- */
-struct gmap *gmap_get_enabled(void)
-{
-	return (struct gmap *) S390_lowcore.gmap;
-}
-EXPORT_SYMBOL_GPL(gmap_get_enabled);
-
 /*
  * gmap_alloc_table is assumed to be called with mmap_lock held
  */
@@ -308,17 +315,15 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	page = gmap_alloc_crst();
 	if (!page)
 		return -ENOMEM;
-	new = (unsigned long *) page_to_phys(page);
+	new = page_to_virt(page);
 	crst_table_init(new, init);
 	spin_lock(&gmap->guest_table_lock);
 	if (*table & _REGION_ENTRY_INVALID) {
-		list_add(&page->lru, &gmap->crst_list);
-		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+		*table = __pa(new) | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
-		page->index = gaddr;
 		page = NULL;
 	}
 	spin_unlock(&gmap->guest_table_lock);
@@ -327,22 +332,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	return 0;
 }
 
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr)
 {
-	struct page *page;
-	unsigned long offset, mask;
+	return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
 
-	offset = (unsigned long) entry / sizeof(unsigned long);
-	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
-	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
-	page = virt_to_page((void *)((unsigned long) entry & mask));
-	return page->index + offset;
+static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr)
+{
+	return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+}
+
+static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr,
+				       unsigned long *gaddr)
+{
+	*gaddr = host_to_guest_delete(gmap, vmaddr);
+	if (IS_GADDR_VALID(*gaddr))
+		return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1);
+	return NULL;
 }
 
 /**
@@ -354,16 +360,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry)
  */
 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
 {
-	unsigned long *entry;
+	unsigned long gaddr;
 	int flush = 0;
+	pmd_t *pmdp;
 
 	BUG_ON(gmap_is_shadow(gmap));
 	spin_lock(&gmap->guest_table_lock);
-	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
-	if (entry) {
-		flush = (*entry != _SEGMENT_ENTRY_EMPTY);
-		*entry = _SEGMENT_ENTRY_EMPTY;
+
+	pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+	if (pmdp) {
+		flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY);
+		*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
 	}
+
 	spin_unlock(&gmap->guest_table_lock);
 	return flush;
 }
@@ -482,26 +491,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
 EXPORT_SYMBOL_GPL(__gmap_translate);
 
 /**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
-	unsigned long rc;
-
-	mmap_read_lock(gmap->mm);
-	rc = __gmap_translate(gmap, gaddr);
-	mmap_read_unlock(gmap->mm);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_translate);
-
-/**
  * gmap_unlink - disconnect a page table from the gmap shadow tables
  * @mm: pointer to the parent mm_struct
  * @table: pointer to the host page table
@@ -557,7 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
 				     gaddr & _REGION1_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -565,7 +554,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
 				     gaddr & _REGION2_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -573,7 +562,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
 				     gaddr & _REGION3_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 	/* Walk the parent mm page table */
@@ -585,12 +574,12 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	pud = pud_offset(p4d, vmaddr);
 	VM_BUG_ON(pud_none(*pud));
 	/* large puds cannot yet be handled */
-	if (pud_large(*pud))
+	if (pud_leaf(*pud))
 		return -EFAULT;
 	pmd = pmd_offset(pud, vmaddr);
 	VM_BUG_ON(pmd_none(*pmd));
 	/* Are we allowed to use huge pages? */
-	if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
+	if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
 		return -EFAULT;
 	/* Link gmap segment table entry location to page table. */
 	rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
@@ -600,12 +589,14 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	spin_lock(&gmap->guest_table_lock);
 	if (*table == _SEGMENT_ENTRY_EMPTY) {
 		rc = radix_tree_insert(&gmap->host_to_guest,
-				       vmaddr >> PMD_SHIFT, table);
+				       vmaddr >> PMD_SHIFT,
+				       (void *)MAKE_VALID_GADDR(gaddr));
 		if (!rc) {
-			if (pmd_large(*pmd)) {
+			if (pmd_leaf(*pmd)) {
 				*table = (pmd_val(*pmd) &
 					  _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
-					| _SEGMENT_ENTRY_GMAP_UC;
+					| _SEGMENT_ENTRY_GMAP_UC
+					| _SEGMENT_ENTRY;
 			} else
 				*table = pmd_val(*pmd) &
 					_SEGMENT_ENTRY_HARDWARE_BITS;
@@ -622,113 +613,27 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	radix_tree_preload_end();
 	return rc;
 }
-
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
-	       unsigned int fault_flags)
-{
-	unsigned long vmaddr;
-	int rc;
-	bool unlocked;
-
-	mmap_read_lock(gmap->mm);
-
-retry:
-	unlocked = false;
-	vmaddr = __gmap_translate(gmap, gaddr);
-	if (IS_ERR_VALUE(vmaddr)) {
-		rc = vmaddr;
-		goto out_up;
-	}
-	if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
-			     &unlocked)) {
-		rc = -EFAULT;
-		goto out_up;
-	}
-	/*
-	 * In the case that fixup_user_fault unlocked the mmap_lock during
-	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
-	 */
-	if (unlocked)
-		goto retry;
-
-	rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
-	mmap_read_unlock(gmap->mm);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_fault);
+EXPORT_SYMBOL(__gmap_link);
 
 /*
  * this function is assumed to be called with mmap_lock held
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	struct vm_area_struct *vma;
 	unsigned long vmaddr;
-	spinlock_t *ptl;
-	pte_t *ptep;
+
+	mmap_assert_locked(gmap->mm);
 
 	/* Find the vm address for the guest address */
 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
 						   gaddr >> PMD_SHIFT);
 	if (vmaddr) {
 		vmaddr |= gaddr & ~PMD_MASK;
-
-		vma = vma_lookup(gmap->mm, vmaddr);
-		if (!vma || is_vm_hugetlb_page(vma))
-			return;
-
-		/* Get pointer to the page table entry */
-		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep)) {
-			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-			pte_unmap_unlock(ptep, ptl);
-		}
+		gmap_helper_zap_one_page(gmap->mm, vmaddr);
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
-
-	mmap_read_lock(gmap->mm);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
-		if (!vmaddr)
-			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		if (!vma)
-			continue;
-		/*
-		 * We do not discard pages that are backed by
-		 * hugetlbfs, so we don't have to refault them.
-		 */
-		if (is_vm_hugetlb_page(vma))
-			continue;
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range(vma, vmaddr, size);
-	}
-	mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
 static LIST_HEAD(gmap_notifier_list);
 static DEFINE_SPINLOCK(gmap_notifier_lock);
 
@@ -790,8 +695,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
  *
  * Note: Can also be called for shadow gmaps.
  */
-static inline unsigned long *gmap_table_walk(struct gmap *gmap,
-					     unsigned long gaddr, int level)
+unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level)
 {
 	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
 	unsigned long *table = gmap->table;
@@ -813,7 +717,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -821,7 +725,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -829,7 +733,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@ -837,11 +741,12 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
-		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
+		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
+		table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT;
 	}
 	return table;
 }
+EXPORT_SYMBOL(gmap_table_walk);
 
 /**
  * gmap_pte_op_walk - walk the gmap page table, get the page table lock
@@ -896,12 +801,12 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
 
 /**
  * gmap_pte_op_end - release the page table lock
- * @ptl: pointer to the spinlock pointer
+ * @ptep: pointer to the locked pte
+ * @ptl: pointer to the page table spinlock
  */
-static void gmap_pte_op_end(spinlock_t *ptl)
+static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
 {
-	if (ptl)
-		spin_unlock(ptl);
+	pte_unmap_unlock(ptep, ptl);
 }
 
 /**
@@ -932,7 +837,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
 	}
 
 	/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
-	if (!pmd_large(*pmdp))
+	if (!pmd_leaf(*pmdp))
 		spin_unlock(&gmap->guest_table_lock);
 	return pmdp;
 }
@@ -944,7 +849,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
  */
 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
 {
-	if (pmd_large(*pmdp))
+	if (pmd_leaf(*pmdp))
 		spin_unlock(&gmap->guest_table_lock);
 }
 
@@ -1012,7 +917,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
 {
 	int rc;
 	pte_t *ptep;
-	spinlock_t *ptl = NULL;
+	spinlock_t *ptl;
 	unsigned long pbits = 0;
 
 	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
@@ -1026,7 +931,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
 	pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
 	/* Protect and unlock. */
 	rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
-	gmap_pte_op_end(ptl);
+	gmap_pte_op_end(ptep, ptl);
 	return rc;
 }
 
@@ -1038,86 +943,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
  * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
  * @bits: pgste notification bits to set
  *
- * Returns 0 if successfully protected, -ENOMEM if out of memory and
- * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
+ * Returns:
+ *   PAGE_SIZE if a small page was successfully protected;
+ *   HPAGE_SIZE if a large page was successfully protected;
+ *   -ENOMEM if out of memory;
+ *   -EFAULT if gaddr is invalid (or mapping for shadows is missing);
+ *   -EAGAIN if the guest mapping is missing and should be fixed by the caller.
  *
- * Called with sg->mm->mmap_lock in read.
+ * Context: Called with sg->mm->mmap_lock in read.
  */
-static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
-			      unsigned long len, int prot, unsigned long bits)
+int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits)
 {
-	unsigned long vmaddr, dist;
 	pmd_t *pmdp;
-	int rc;
+	int rc = 0;
 
 	BUG_ON(gmap_is_shadow(gmap));
-	while (len) {
-		rc = -EAGAIN;
-		pmdp = gmap_pmd_op_walk(gmap, gaddr);
-		if (pmdp) {
-			if (!pmd_large(*pmdp)) {
-				rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
-						      bits);
-				if (!rc) {
-					len -= PAGE_SIZE;
-					gaddr += PAGE_SIZE;
-				}
-			} else {
-				rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
-						      bits);
-				if (!rc) {
-					dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
-					len = len < dist ? 0 : len - dist;
-					gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
-				}
-			}
-			gmap_pmd_op_end(gmap, pmdp);
-		}
-		if (rc) {
-			if (rc == -EINVAL)
-				return rc;
 
-			/* -EAGAIN, fixup of userspace mm and gmap */
-			vmaddr = __gmap_translate(gmap, gaddr);
-			if (IS_ERR_VALUE(vmaddr))
-				return vmaddr;
-			rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
-			if (rc)
-				return rc;
-		}
-	}
-	return 0;
-}
+	pmdp = gmap_pmd_op_walk(gmap, gaddr);
+	if (!pmdp)
+		return -EAGAIN;
 
-/**
- * gmap_mprotect_notify - change access rights for a range of ptes and
- *                        call the notifier if any pte changes again
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
- *
- * Returns 0 if for each page in the given range a gmap mapping exists,
- * the new access rights could be set and the notifier could be armed.
- * If the gmap mapping is missing for one or more pages -EFAULT is
- * returned. If no memory could be allocated -ENOMEM is returned.
- * This function establishes missing page table entries.
- */
-int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
-			 unsigned long len, int prot)
-{
-	int rc;
+	if (!pmd_leaf(*pmdp)) {
+		rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits);
+		if (!rc)
+			rc = PAGE_SIZE;
+	} else {
+		rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits);
+		if (!rc)
+			rc = HPAGE_SIZE;
+	}
+	gmap_pmd_op_end(gmap, pmdp);
 
-	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap))
-		return -EINVAL;
-	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
-		return -EINVAL;
-	mmap_read_lock(gmap->mm);
-	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
-	mmap_read_unlock(gmap->mm);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
+EXPORT_SYMBOL_GPL(gmap_protect_one);
 
 /**
  * gmap_read_table - get an unsigned long value from a guest page table using
@@ -1150,12 +1009,12 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
 			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
 				address = pte_val(pte) & PAGE_MASK;
 				address += gaddr & ~PAGE_MASK;
-				*val = *(unsigned long *) address;
+				*val = *(unsigned long *)__va(address);
 				set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
 				/* Do *NOT* clear the _PAGE_INVALID bit! */
 				rc = 0;
 			}
-			gmap_pte_op_end(ptl);
+			gmap_pte_op_end(ptep, ptl);
 		}
 		if (!rc)
 			break;
@@ -1249,7 +1108,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
 			if (!rc)
 				gmap_insert_rmap(sg, vmaddr, rmap);
 			spin_unlock(&sg->guest_table_lock);
-			gmap_pte_op_end(ptl);
+			gmap_pte_op_end(ptep, ptl);
 		}
 		radix_tree_preload_end();
 		if (rc) {
@@ -1304,7 +1163,7 @@ static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr)
 	table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */
 	if (!table || *table & _PAGE_INVALID)
 		return;
-	gmap_call_notifier(sg, raddr, raddr + _PAGE_SIZE - 1);
+	gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1);
 	ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table);
 }
 
@@ -1322,7 +1181,7 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
-	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += _PAGE_SIZE)
+	for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE)
 		pgt[i] = _PAGE_INVALID;
 }
 
@@ -1335,23 +1194,23 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long sto, *ste, *pgt;
-	struct page *page;
+	unsigned long *ste;
+	phys_addr_t sto, pgt;
+	struct ptdesc *ptdesc;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
 	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
-	sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
+	sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
 	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
-	pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
+	pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
 	*ste = _SEGMENT_ENTRY_EMPTY;
-	__gmap_unshadow_pgt(sg, raddr, pgt);
+	__gmap_unshadow_pgt(sg, raddr, __va(pgt));
 	/* Free page table */
-	page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
-	list_del(&page->lru);
-	page_table_free_pgste(page);
+	ptdesc = page_ptdesc(phys_to_page(pgt));
+	page_table_free_pgste(ptdesc);
 }
 
 /**
@@ -1365,21 +1224,20 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
 				unsigned long *sgt)
 {
-	unsigned long *pgt;
-	struct page *page;
+	struct ptdesc *ptdesc;
+	phys_addr_t pgt;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
 		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
 			continue;
-		pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+		pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
 		sgt[i] = _SEGMENT_ENTRY_EMPTY;
-		__gmap_unshadow_pgt(sg, raddr, pgt);
+		__gmap_unshadow_pgt(sg, raddr, __va(pgt));
 		/* Free page table */
-		page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
-		list_del(&page->lru);
-		page_table_free_pgste(page);
+		ptdesc = page_ptdesc(phys_to_page(pgt));
+		page_table_free_pgste(ptdesc);
 	}
 }
 
@@ -1392,7 +1250,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r3o, *r3e, *sgt;
+	unsigned long r3o, *r3e;
+	phys_addr_t sgt;
 	struct page *page;
 
 	BUG_ON(!gmap_is_shadow(sg));
@@ -1401,13 +1260,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
 	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
-	gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
-	sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
+	sgt = *r3e & _REGION_ENTRY_ORIGIN;
 	*r3e = _REGION3_ENTRY_EMPTY;
-	__gmap_unshadow_sgt(sg, raddr, sgt);
+	__gmap_unshadow_sgt(sg, raddr, __va(sgt));
 	/* Free segment table */
-	page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
-	list_del(&page->lru);
+	page = phys_to_page(sgt);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
@@ -1422,20 +1280,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r3t)
 {
-	unsigned long *sgt;
 	struct page *page;
+	phys_addr_t sgt;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
 		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
+		sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
 		r3t[i] = _REGION3_ENTRY_EMPTY;
-		__gmap_unshadow_sgt(sg, raddr, sgt);
+		__gmap_unshadow_sgt(sg, raddr, __va(sgt));
 		/* Free segment table */
-		page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
-		list_del(&page->lru);
+		page = phys_to_page(sgt);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
@@ -1449,7 +1306,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r2o, *r2e, *r3t;
+	unsigned long r2o, *r2e;
+	phys_addr_t r3t;
 	struct page *page;
 
 	BUG_ON(!gmap_is_shadow(sg));
@@ -1458,13 +1316,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
 	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
-	gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
-	r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
+	r3t = *r2e & _REGION_ENTRY_ORIGIN;
 	*r2e = _REGION2_ENTRY_EMPTY;
-	__gmap_unshadow_r3t(sg, raddr, r3t);
+	__gmap_unshadow_r3t(sg, raddr, __va(r3t));
 	/* Free region 3 table */
-	page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
-	list_del(&page->lru);
+	page = phys_to_page(r3t);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
@@ -1479,7 +1336,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r2t)
 {
-	unsigned long *r3t;
+	phys_addr_t r3t;
 	struct page *page;
 	int i;
 
@@ -1487,12 +1344,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
 		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
+		r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
 		r2t[i] = _REGION2_ENTRY_EMPTY;
-		__gmap_unshadow_r3t(sg, raddr, r3t);
+		__gmap_unshadow_r3t(sg, raddr, __va(r3t));
 		/* Free region 3 table */
-		page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
-		list_del(&page->lru);
+		page = phys_to_page(r3t);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
@@ -1506,8 +1362,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r1o, *r1e, *r2t;
+	unsigned long r1o, *r1e;
 	struct page *page;
+	phys_addr_t r2t;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
@@ -1515,13 +1372,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
 	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
-	gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
-	r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
+	r2t = *r1e & _REGION_ENTRY_ORIGIN;
 	*r1e = _REGION1_ENTRY_EMPTY;
-	__gmap_unshadow_r2t(sg, raddr, r2t);
+	__gmap_unshadow_r2t(sg, raddr, __va(r2t));
 	/* Free region 2 table */
-	page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
-	list_del(&page->lru);
+	page = phys_to_page(r2t);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
 
@@ -1536,23 +1392,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r1t)
 {
-	unsigned long asce, *r2t;
+	unsigned long asce;
 	struct page *page;
+	phys_addr_t r2t;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
-	asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
+	asce = __pa(r1t) | _ASCE_TYPE_REGION1;
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
 		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
-		__gmap_unshadow_r2t(sg, raddr, r2t);
+		r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
+		__gmap_unshadow_r2t(sg, raddr, __va(r2t));
 		/* Clear entry and flush translation r1t -> r2t */
 		gmap_idte_one(asce, raddr);
 		r1t[i] = _REGION1_ENTRY_EMPTY;
 		/* Free region 2 table */
-		page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
-		list_del(&page->lru);
+		page = phys_to_page(r2t);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
 }
@@ -1563,7 +1419,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
  *
  * Called with sg->guest_table_lock
  */
-static void gmap_unshadow(struct gmap *sg)
+void gmap_unshadow(struct gmap *sg)
 {
 	unsigned long *table;
 
@@ -1573,7 +1429,7 @@ static void gmap_unshadow(struct gmap *sg)
 	sg->removed = 1;
 	gmap_call_notifier(sg, 0, -1UL);
 	gmap_flush_tlb(sg);
-	table = (unsigned long *)(sg->asce & _ASCE_ORIGIN);
+	table = __va(sg->asce & _ASCE_ORIGIN);
 	switch (sg->asce & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
 		__gmap_unshadow_r1t(sg, 0, table);
@@ -1589,142 +1445,7 @@ static void gmap_unshadow(struct gmap *sg)
 		break;
 	}
 }
-
-/**
- * gmap_find_shadow - find a specific asce in the list of shadow tables
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns the pointer to a gmap if a shadow table with the given asce is
- * already available, ERR_PTR(-EAGAIN) if another one is just being created,
- * otherwise NULL
- */
-static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce,
-				     int edat_level)
-{
-	struct gmap *sg;
-
-	list_for_each_entry(sg, &parent->children, list) {
-		if (sg->orig_asce != asce || sg->edat_level != edat_level ||
-		    sg->removed)
-			continue;
-		if (!sg->initialized)
-			return ERR_PTR(-EAGAIN);
-		refcount_inc(&sg->ref_count);
-		return sg;
-	}
-	return NULL;
-}
-
-/**
- * gmap_shadow_valid - check if a shadow guest address space matches the
- *                     given properties and is still valid
- * @sg: pointer to the shadow guest address space structure
- * @asce: ASCE for which the shadow table is requested
- * @edat_level: edat level to be used for the shadow translation
- *
- * Returns 1 if the gmap shadow is still valid and matches the given
- * properties, the caller can continue using it. Returns 0 otherwise, the
- * caller has to request a new shadow gmap in this case.
- *
- */
-int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
-{
-	if (sg->removed)
-		return 0;
-	return sg->orig_asce == asce && sg->edat_level == edat_level;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow_valid);
-
-/**
- * gmap_shadow - create/find a shadow guest address space
- * @parent: pointer to the parent gmap
- * @asce: ASCE for which the shadow table is created
- * @edat_level: edat level to be used for the shadow translation
- *
- * The pages of the top level page table referred by the asce parameter
- * will be set to read-only and marked in the PGSTEs of the kvm process.
- * The shadow table will be removed automatically on any change to the
- * PTE mapping for the source table.
- *
- * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
- * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
- * parent gmap table could not be protected.
- */
-struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
-			 int edat_level)
-{
-	struct gmap *sg, *new;
-	unsigned long limit;
-	int rc;
-
-	BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
-	BUG_ON(gmap_is_shadow(parent));
-	spin_lock(&parent->shadow_lock);
-	sg = gmap_find_shadow(parent, asce, edat_level);
-	spin_unlock(&parent->shadow_lock);
-	if (sg)
-		return sg;
-	/* Create a new shadow gmap */
-	limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11));
-	if (asce & _ASCE_REAL_SPACE)
-		limit = -1UL;
-	new = gmap_alloc(limit);
-	if (!new)
-		return ERR_PTR(-ENOMEM);
-	new->mm = parent->mm;
-	new->parent = gmap_get(parent);
-	new->orig_asce = asce;
-	new->edat_level = edat_level;
-	new->initialized = false;
-	spin_lock(&parent->shadow_lock);
-	/* Recheck if another CPU created the same shadow */
-	sg = gmap_find_shadow(parent, asce, edat_level);
-	if (sg) {
-		spin_unlock(&parent->shadow_lock);
-		gmap_free(new);
-		return sg;
-	}
-	if (asce & _ASCE_REAL_SPACE) {
-		/* only allow one real-space gmap shadow */
-		list_for_each_entry(sg, &parent->children, list) {
-			if (sg->orig_asce & _ASCE_REAL_SPACE) {
-				spin_lock(&sg->guest_table_lock);
-				gmap_unshadow(sg);
-				spin_unlock(&sg->guest_table_lock);
-				list_del(&sg->list);
-				gmap_put(sg);
-				break;
-			}
-		}
-	}
-	refcount_set(&new->ref_count, 2);
-	list_add(&new->list, &parent->children);
-	if (asce & _ASCE_REAL_SPACE) {
-		/* nothing to protect, return right away */
-		new->initialized = true;
-		spin_unlock(&parent->shadow_lock);
-		return new;
-	}
-	spin_unlock(&parent->shadow_lock);
-	/* protect after insertion, so it will get properly invalidated */
-	mmap_read_lock(parent->mm);
-	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
-				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
-				PROT_READ, GMAP_NOTIFY_SHADOW);
-	mmap_read_unlock(parent->mm);
-	spin_lock(&parent->shadow_lock);
-	new->initialized = true;
-	if (rc) {
-		list_del(&new->list);
-		gmap_free(new);
-		new = ERR_PTR(rc);
-	}
-	spin_unlock(&parent->shadow_lock);
-	return new;
-}
-EXPORT_SYMBOL_GPL(gmap_shadow);
+EXPORT_SYMBOL(gmap_unshadow);
 
 /**
  * gmap_shadow_r2t - create an empty shadow region 2 table
@@ -1736,7 +1457,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow);
  * The r2t parameter specifies the address of the source table. The
  * four pages of the source table are made read-only in the parent gmap
  * address space. A write to the source table area @r2t will automatically
- * remove the shadow r2 table and all of its decendents.
+ * remove the shadow r2 table and all of its descendants.
  *
  * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
  * shadow table structure is incomplete, -ENOMEM if out of memory and
@@ -1748,19 +1469,17 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_r2t, *table;
+	unsigned long *table;
+	phys_addr_t s_r2t;
 	struct page *page;
 	int rc;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	/* Allocate a shadow region second table */
-	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	page = gmap_alloc_crst();
 	if (!page)
 		return -ENOMEM;
-	page->index = r2t & _REGION_ENTRY_ORIGIN;
-	if (fake)
-		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_r2t = (unsigned long *) page_to_phys(page);
+	s_r2t = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
@@ -1775,13 +1494,12 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY);
+	crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH |
+	*table = s_r2t | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= (r2t & _REGION_ENTRY_PROTECT);
-	list_add(&page->lru, &sg->crst_list);
 	if (fake) {
 		/* nothing to protect for fake tables */
 		*table &= ~_REGION_ENTRY_INVALID;
@@ -1798,8 +1516,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 4);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_r2t)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -1832,19 +1549,17 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_r3t, *table;
+	unsigned long *table;
+	phys_addr_t s_r3t;
 	struct page *page;
 	int rc;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	/* Allocate a shadow region second table */
-	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	page = gmap_alloc_crst();
 	if (!page)
 		return -ENOMEM;
-	page->index = r3t & _REGION_ENTRY_ORIGIN;
-	if (fake)
-		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_r3t = (unsigned long *) page_to_phys(page);
+	s_r3t = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
@@ -1859,13 +1574,12 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
+	crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH |
+	*table = s_r3t | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= (r3t & _REGION_ENTRY_PROTECT);
-	list_add(&page->lru, &sg->crst_list);
 	if (fake) {
 		/* nothing to protect for fake tables */
 		*table &= ~_REGION_ENTRY_INVALID;
@@ -1882,8 +1596,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 3);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_r3t)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -1916,19 +1629,17 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_sgt, *table;
+	unsigned long *table;
+	phys_addr_t s_sgt;
 	struct page *page;
 	int rc;
 
 	BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
 	/* Allocate a shadow segment table */
-	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+	page = gmap_alloc_crst();
 	if (!page)
 		return -ENOMEM;
-	page->index = sgt & _REGION_ENTRY_ORIGIN;
-	if (fake)
-		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_sgt = (unsigned long *) page_to_phys(page);
+	s_sgt = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
@@ -1943,13 +1654,12 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY);
+	crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH |
+	*table = s_sgt | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= sgt & _REGION_ENTRY_PROTECT;
-	list_add(&page->lru, &sg->crst_list);
 	if (fake) {
 		/* nothing to protect for fake tables */
 		*table &= ~_REGION_ENTRY_INVALID;
@@ -1966,8 +1676,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 2);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_sgt)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -1983,45 +1692,22 @@ out_free:
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
 
-/**
- * gmap_shadow_pgt_lookup - find a shadow page table
- * @sg: pointer to the shadow guest address space structure
- * @saddr: the address in the shadow aguest address space
- * @pgt: parent gmap address of the page table to get shadowed
- * @dat_protection: if the pgtable is marked as protected by dat
- * @fake: pgt references contiguous guest memory block, not a pgtable
- *
- * Returns 0 if the shadow page table was found and -EAGAIN if the page
- * table was not found.
- *
- * Called with sg->mm->mmap_lock in read.
- */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
-			   unsigned long *pgt, int *dat_protection,
-			   int *fake)
+static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr)
 {
-	unsigned long *table;
-	struct page *page;
-	int rc;
+	unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc));
 
-	BUG_ON(!gmap_is_shadow(sg));
-	spin_lock(&sg->guest_table_lock);
-	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
-	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
-		/* Shadow page tables are full pages (pte+pgste) */
-		page = pfn_to_page(*table >> PAGE_SHIFT);
-		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
-		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
-		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
-		rc = 0;
-	} else  {
-		rc = -EAGAIN;
-	}
-	spin_unlock(&sg->guest_table_lock);
-	return rc;
+	pgstes += _PAGE_ENTRIES;
+
+	pgstes[0] &= ~PGSTE_ST2_MASK;
+	pgstes[1] &= ~PGSTE_ST2_MASK;
+	pgstes[2] &= ~PGSTE_ST2_MASK;
+	pgstes[3] &= ~PGSTE_ST2_MASK;
 
+	pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK;
+	pgstes[1] |= pgt_addr & PGSTE_ST2_MASK;
+	pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK;
+	pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK;
 }
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
 
 /**
  * gmap_shadow_pgt - instantiate a shadow page table
@@ -2040,19 +1726,21 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 		    int fake)
 {
 	unsigned long raddr, origin;
-	unsigned long *s_pgt, *table;
-	struct page *page;
+	unsigned long *table;
+	struct ptdesc *ptdesc;
+	phys_addr_t s_pgt;
 	int rc;
 
 	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
 	/* Allocate a shadow page table */
-	page = page_table_alloc_pgste(sg->mm);
-	if (!page)
+	ptdesc = page_table_alloc_pgste(sg->mm);
+	if (!ptdesc)
 		return -ENOMEM;
-	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+	origin = pgt & _SEGMENT_ENTRY_ORIGIN;
 	if (fake)
-		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_pgt = (unsigned long *) page_to_phys(page);
+		origin |= GMAP_SHADOW_FAKE_TABLE;
+	gmap_pgste_set_pgt_addr(ptdesc, origin);
+	s_pgt = page_to_phys(ptdesc_page(ptdesc));
 	/* Install shadow page table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
@@ -2070,7 +1758,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	/* mark as invalid as long as the parent table is not protected */
 	*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
 		 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
-	list_add(&page->lru, &sg->pt_list);
 	if (fake) {
 		/* nothing to protect for fake tables */
 		*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2085,8 +1772,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 1);
-		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) !=
-			      (unsigned long) s_pgt)
+		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2097,7 +1783,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	return rc;
 out_free:
 	spin_unlock(&sg->guest_table_lock);
-	page_table_free_pgste(page);
+	page_table_free_pgste(ptdesc);
 	return rc;
 
 }
@@ -2152,7 +1838,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 			tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
 			if (!tptep) {
 				spin_unlock(&sg->guest_table_lock);
-				gmap_pte_op_end(ptl);
+				gmap_pte_op_end(sptep, ptl);
 				radix_tree_preload_end();
 				break;
 			}
@@ -2163,7 +1849,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 				rmap = NULL;
 				rc = 0;
 			}
-			gmap_pte_op_end(ptl);
+			gmap_pte_op_end(sptep, ptl);
 			spin_unlock(&sg->guest_table_lock);
 		}
 		radix_tree_preload_end();
@@ -2249,7 +1935,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 		 pte_t *pte, unsigned long bits)
 {
 	unsigned long offset, gaddr = 0;
-	unsigned long *table;
 	struct gmap *gmap, *sg, *next;
 
 	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
@@ -2257,12 +1942,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		spin_lock(&gmap->guest_table_lock);
-		table = radix_tree_lookup(&gmap->host_to_guest,
-					  vmaddr >> PMD_SHIFT);
-		if (table)
-			gaddr = __gmap_segment_gaddr(table) + offset;
+		gaddr = host_to_guest_lookup(gmap, vmaddr) + offset;
 		spin_unlock(&gmap->guest_table_lock);
-		if (!table)
+		if (!IS_GADDR_VALID(gaddr))
 			continue;
 
 		if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) {
@@ -2302,10 +1984,10 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
 	gaddr &= HPAGE_MASK;
 	pmdp_notify_gmap(gmap, pmdp, gaddr);
 	new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
 			    IDTE_GLOBAL);
-	else if (MACHINE_HAS_IDTE)
+	else if (cpu_has_idte())
 		__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
 	else
 		__pmdp_csp(pmdp);
@@ -2322,13 +2004,12 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		spin_lock(&gmap->guest_table_lock);
-		pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
-						  vmaddr >> PMD_SHIFT);
+		pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
 		if (pmdp) {
-			gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
 			pmdp_notify_gmap(gmap, pmdp, gaddr);
 			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
-						   _SEGMENT_ENTRY_GMAP_UC));
+						   _SEGMENT_ENTRY_GMAP_UC |
+						   _SEGMENT_ENTRY));
 			if (purge)
 				__pmdp_csp(pmdp);
 			set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
@@ -2368,27 +2049,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
  */
 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
 {
-	unsigned long *entry, gaddr;
+	unsigned long gaddr;
 	struct gmap *gmap;
 	pmd_t *pmdp;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		spin_lock(&gmap->guest_table_lock);
-		entry = radix_tree_delete(&gmap->host_to_guest,
-					  vmaddr >> PMD_SHIFT);
-		if (entry) {
-			pmdp = (pmd_t *)entry;
-			gaddr = __gmap_segment_gaddr(entry);
+		pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+		if (pmdp) {
 			pmdp_notify_gmap(gmap, pmdp, gaddr);
-			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
-					   _SEGMENT_ENTRY_GMAP_UC));
-			if (MACHINE_HAS_TLB_GUEST)
+			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+						   _SEGMENT_ENTRY_GMAP_UC |
+						   _SEGMENT_ENTRY));
+			if (machine_has_tlb_guest())
 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
 					    gmap->asce, IDTE_LOCAL);
-			else if (MACHINE_HAS_IDTE)
+			else if (cpu_has_idte())
 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
-			*entry = _SEGMENT_ENTRY_EMPTY;
+			*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
 		}
 		spin_unlock(&gmap->guest_table_lock);
 	}
@@ -2403,29 +2082,27 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
  */
 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
 {
-	unsigned long *entry, gaddr;
+	unsigned long gaddr;
 	struct gmap *gmap;
 	pmd_t *pmdp;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
 		spin_lock(&gmap->guest_table_lock);
-		entry = radix_tree_delete(&gmap->host_to_guest,
-					  vmaddr >> PMD_SHIFT);
-		if (entry) {
-			pmdp = (pmd_t *)entry;
-			gaddr = __gmap_segment_gaddr(entry);
+		pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr);
+		if (pmdp) {
 			pmdp_notify_gmap(gmap, pmdp, gaddr);
-			WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
-					   _SEGMENT_ENTRY_GMAP_UC));
-			if (MACHINE_HAS_TLB_GUEST)
+			WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+						   _SEGMENT_ENTRY_GMAP_UC |
+						   _SEGMENT_ENTRY));
+			if (machine_has_tlb_guest())
 				__pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
 					    gmap->asce, IDTE_GLOBAL);
-			else if (MACHINE_HAS_IDTE)
+			else if (cpu_has_idte())
 				__pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
 			else
 				__pmdp_csp(pmdp);
-			*entry = _SEGMENT_ENTRY_EMPTY;
+			*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
 		}
 		spin_unlock(&gmap->guest_table_lock);
 	}
@@ -2481,7 +2158,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
 	if (!pmdp)
 		return;
 
-	if (pmd_large(*pmdp)) {
+	if (pmd_leaf(*pmdp)) {
 		if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
 			bitmap_fill(bitmap, _PAGE_ENTRIES);
 	} else {
@@ -2491,7 +2168,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
 				continue;
 			if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
 				set_bit(i, bitmap);
-			spin_unlock(ptl);
+			pte_unmap_unlock(ptep, ptl);
 		}
 	}
 	gmap_pmd_op_end(gmap, pmdp);
@@ -2510,15 +2187,16 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops thp_split_walk_ops = {
 	.pmd_entry	= thp_split_walk_pmd_entry,
+	.walk_lock	= PGWALK_WRLOCK_VERIFY,
 };
 
 static inline void thp_split_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
-		vma->vm_flags &= ~VM_HUGEPAGE;
-		vma->vm_flags |= VM_NOHUGEPAGE;
+	for_each_vma(vmi, vma) {
+		vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE);
 		walk_page_vma(vma, &thp_split_walk_ops, NULL);
 	}
 	mm->def_flags |= VM_NOHUGEPAGE;
@@ -2530,33 +2208,6 @@ static inline void thp_split_mm(struct mm_struct *mm)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * Remove all empty zero pages from the mapping for lazy refaulting
- * - This must be called after mm->context.has_pgste is set, to avoid
- *   future creation of zero pages
- * - This must be called after THP was enabled
- */
-static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
-			   unsigned long end, struct mm_walk *walk)
-{
-	unsigned long addr;
-
-	for (addr = start; addr != end; addr += PAGE_SIZE) {
-		pte_t *ptep;
-		spinlock_t *ptl;
-
-		ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
-		if (is_zero_pfn(pte_pfn(*ptep)))
-			ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
-		pte_unmap_unlock(ptep, ptl);
-	}
-	return 0;
-}
-
-static const struct mm_walk_ops zap_zero_walk_ops = {
-	.pmd_entry	= __zap_zero_pages,
-};
-
-/*
  * switch on pgstes for its userspace process (for kvm)
  */
 int s390_enable_sie(void)
@@ -2566,36 +2217,15 @@ int s390_enable_sie(void)
 	/* Do we have pgstes? if yes, we are done */
 	if (mm_has_pgste(mm))
 		return 0;
-	/* Fail if the page tables are 2K */
-	if (!mm_alloc_pgste(mm))
-		return -EINVAL;
 	mmap_write_lock(mm);
 	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
-	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
 	mmap_write_unlock(mm);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
-int gmap_mark_unmergeable(void)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	int ret;
-
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
-				  MADV_UNMERGEABLE, &vma->vm_flags);
-		if (ret)
-			return ret;
-	}
-	mm->def_flags &= ~VM_MERGEABLE;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
-
 /*
  * Enable storage key handling from now on and initialize the storage
  * keys with the default key.
@@ -2626,7 +2256,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
 {
 	pmd_t *pmd = (pmd_t *)pte;
 	unsigned long start, end;
-	struct page *page = pmd_page(*pmd);
+	struct folio *folio = page_folio(pmd_page(*pmd));
 
 	/*
 	 * The write check makes sure we do not set a key on shared
@@ -2639,9 +2269,9 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
 		return 0;
 
 	start = pmd_val(*pmd) & HPAGE_MASK;
-	end = start + HPAGE_SIZE - 1;
+	end = start + HPAGE_SIZE;
 	__storage_key_init_range(start, end);
-	set_bit(PG_arch_1, &page->flags);
+	set_bit(PG_arch_1, &folio->flags);
 	cond_resched();
 	return 0;
 }
@@ -2650,6 +2280,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
 	.hugetlb_entry		= __s390_enable_skey_hugetlb,
 	.pte_entry		= __s390_enable_skey_pte,
 	.pmd_entry		= __s390_enable_skey_pmd,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 int s390_enable_skey(void)
@@ -2662,7 +2293,7 @@ int s390_enable_skey(void)
 		goto out_up;
 
 	mm->context.uses_skeys = 1;
-	rc = gmap_mark_unmergeable();
+	rc = gmap_helper_disable_cow_sharing();
 	if (rc) {
 		mm->context.uses_skeys = 0;
 		goto out_up;
@@ -2687,6 +2318,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
 
 static const struct mm_walk_ops reset_cmma_walk_ops = {
 	.pte_entry		= __s390_reset_cmma,
+	.walk_lock		= PGWALK_WRLOCK,
 };
 
 void s390_reset_cmma(struct mm_struct *mm)
@@ -2697,41 +2329,124 @@ void s390_reset_cmma(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(s390_reset_cmma);
 
+#define GATHER_GET_PAGES 32
+
+struct reset_walk_state {
+	unsigned long next;
+	unsigned long count;
+	unsigned long pfns[GATHER_GET_PAGES];
+};
+
+static int s390_gather_pages(pte_t *ptep, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	struct reset_walk_state *p = walk->private;
+	pte_t pte = READ_ONCE(*ptep);
+
+	if (pte_present(pte)) {
+		/* we have a reference from the mapping, take an extra one */
+		get_page(phys_to_page(pte_val(pte)));
+		p->pfns[p->count] = phys_to_pfn(pte_val(pte));
+		p->next = next;
+		p->count++;
+	}
+	return p->count >= GATHER_GET_PAGES;
+}
+
+static const struct mm_walk_ops gather_pages_ops = {
+	.pte_entry = s390_gather_pages,
+	.walk_lock = PGWALK_RDLOCK,
+};
+
 /*
- * make inaccessible pages accessible again
+ * Call the Destroy secure page UVC on each page in the given array of PFNs.
+ * Each page needs to have an extra reference, which will be released here.
  */
-static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
-			    unsigned long next, struct mm_walk *walk)
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
 {
-	pte_t pte = READ_ONCE(*ptep);
+	struct folio *folio;
+	unsigned long i;
 
-	/* There is a reference through the mapping */
-	if (pte_present(pte))
-		WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
+	for (i = 0; i < count; i++) {
+		folio = pfn_folio(pfns[i]);
+		/* we always have an extra reference */
+		uv_destroy_folio(folio);
+		/* get rid of the extra reference */
+		folio_put(folio);
+		cond_resched();
+	}
+}
+EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
 
+/**
+ * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
+ * in the given range of the given address space.
+ * @mm: the mm to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ * @interruptible: if not 0, stop when a fatal signal is received
+ *
+ * Walk the given range of the given address space and call the destroy
+ * secure page UVC on each page. Optionally exit early if a fatal signal is
+ * pending.
+ *
+ * Return: 0 on success, -EINTR if the function stopped before completing
+ */
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+			    unsigned long end, bool interruptible)
+{
+	struct reset_walk_state state = { .next = start };
+	int r = 1;
+
+	while (r > 0) {
+		state.count = 0;
+		mmap_read_lock(mm);
+		r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
+		mmap_read_unlock(mm);
+		cond_resched();
+		s390_uv_destroy_pfns(state.count, state.pfns);
+		if (interruptible && fatal_signal_pending(current))
+			return -EINTR;
+	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
 
-static const struct mm_walk_ops reset_acc_walk_ops = {
-	.pte_entry		= __s390_reset_acc,
-};
-
-#include <linux/sched/mm.h>
-void s390_reset_acc(struct mm_struct *mm)
+/**
+ * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
+ * @gmap: the gmap whose ASCE needs to be replaced
+ *
+ * If the ASCE is a SEGMENT type then this function will return -EINVAL,
+ * otherwise the pointers in the host_to_guest radix tree will keep pointing
+ * to the wrong pages, causing use-after-free and memory corruption.
+ * If the allocation of the new top level page table fails, the ASCE is not
+ * replaced.
+ * In any case, the old ASCE is always removed from the gmap CRST list.
+ * Therefore the caller has to make sure to save a pointer to it
+ * beforehand, unless a leak is actually intended.
+ */
+int s390_replace_asce(struct gmap *gmap)
 {
-	if (!mm_is_protected(mm))
-		return;
-	/*
-	 * we might be called during
-	 * reset:                             we walk the pages and clear
-	 * close of all kvm file descriptors: we walk the pages and clear
-	 * exit of process on fd closure:     vma already gone, do nothing
-	 */
-	if (!mmget_not_zero(mm))
-		return;
-	mmap_read_lock(mm);
-	walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
-	mmap_read_unlock(mm);
-	mmput(mm);
+	unsigned long asce;
+	struct page *page;
+	void *table;
+
+	/* Replacing segment type ASCEs would cause serious issues */
+	if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
+		return -EINVAL;
+
+	page = gmap_alloc_crst();
+	if (!page)
+		return -ENOMEM;
+	table = page_to_virt(page);
+	memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
+
+	/* Set new table origin while preserving existing ASCE control bits */
+	asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
+	WRITE_ONCE(gmap->asce, asce);
+	WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
+	WRITE_ONCE(gmap->table, table);
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(s390_reset_acc);
+EXPORT_SYMBOL_GPL(s390_replace_asce);
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..a45d417ad951
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry))
+		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+	free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	/* Find the vm address for the guest address */
+	vma = vma_lookup(mm, vmaddr);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return;
+
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	if (unlikely(!ptep))
+		return;
+	if (pte_swap(*ptep))
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	struct vm_area_struct *vma;
+
+	mmap_assert_locked(mm);
+
+	while (vmaddr < end) {
+		vma = find_vma_intersection(mm, vmaddr, end);
+		if (!vma)
+			return;
+		if (!is_vm_hugetlb_page(vma))
+			zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+		vmaddr = vma->vm_end;
+	}
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long *found_addr = walk->private;
+
+	/* Return 1 of the page is a zeropage. */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		/*
+		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
+		 * currently only works in COW mappings, which is also where
+		 * mm_forbids_zeropage() is checked.
+		 */
+		if (!is_cow_mapping(walk->vma->vm_flags))
+			return -EFAULT;
+
+		*found_addr = addr;
+		return 1;
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+	.pte_entry      = find_zeropage_pte_entry,
+	.walk_lock      = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+	unsigned long addr;
+	vm_fault_t fault;
+	int rc;
+
+	for_each_vma(vmi, vma) {
+		/*
+		 * We could only look at COW mappings, but it's more future
+		 * proof to catch unexpected zeropages in other mappings and
+		 * fail.
+		 */
+		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+			continue;
+		addr = vma->vm_start;
+
+retry:
+		rc = walk_page_range_vma(vma, addr, vma->vm_end,
+					 &find_zeropage_ops, &addr);
+		if (rc < 0)
+			return rc;
+		else if (!rc)
+			continue;
+
+		/* addr was updated by find_zeropage_pte_entry() */
+		fault = handle_mm_fault(vma, addr,
+					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+					NULL);
+		if (fault & VM_FAULT_OOM)
+			return -ENOMEM;
+		/*
+		 * See break_ksm(): even after handle_mm_fault() returned 0, we
+		 * must start the lookup from the current address, because
+		 * handle_mm_fault() may back out if there's any difficulty.
+		 *
+		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+		 * maybe they could trigger in the future on concurrent
+		 * truncation. In that case, the shared zeropage would be gone
+		 * and we can simply retry and make progress.
+		 */
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+	struct mm_struct *mm = current->mm;
+	int rc;
+
+	mmap_assert_write_locked(mm);
+
+	if (!mm->context.allow_cow_sharing)
+		return 0;
+
+	mm->context.allow_cow_sharing = 0;
+
+	/* Replace all shared zeropages by anonymous pages. */
+	rc = __gmap_helper_unshare_zeropages(mm);
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	if (!rc)
+		rc = ksm_disable(mm);
+	if (rc)
+		mm->context.allow_cow_sharing = 1;
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 10e51ef9c79a..e88c02c9e642 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -9,12 +9,13 @@
 #define KMSG_COMPONENT "hugetlb"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <asm/pgalloc.h>
+#include <linux/cpufeature.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/security.h>
+#include <asm/pgalloc.h>
 
 /*
  * If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -24,6 +25,7 @@
 
 static inline unsigned long __pte_to_rste(pte_t pte)
 {
+	swp_entry_t arch_entry;
 	unsigned long rste;
 
 	/*
@@ -48,6 +50,7 @@ static inline unsigned long __pte_to_rste(pte_t pte)
 	 */
 	if (pte_present(pte)) {
 		rste = pte_val(pte) & PAGE_MASK;
+		rste |= _SEGMENT_ENTRY_PRESENT;
 		rste |= move_set_bit(pte_val(pte), _PAGE_READ,
 				     _SEGMENT_ENTRY_READ);
 		rste |= move_set_bit(pte_val(pte), _PAGE_WRITE,
@@ -66,6 +69,10 @@ static inline unsigned long __pte_to_rste(pte_t pte)
 #endif
 		rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC,
 				     _SEGMENT_ENTRY_NOEXEC);
+	} else if (!pte_none(pte)) {
+		/* swap pte */
+		arch_entry = __pte_to_swp_entry(pte);
+		rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry));
 	} else
 		rste = _SEGMENT_ENTRY_EMPTY;
 	return rste;
@@ -73,13 +80,18 @@ static inline unsigned long __pte_to_rste(pte_t pte)
 
 static inline pte_t __rste_to_pte(unsigned long rste)
 {
+	swp_entry_t arch_entry;
 	unsigned long pteval;
-	int present;
+	int present, none;
+	pte_t pte;
 
-	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
 		present = pud_present(__pud(rste));
-	else
+		none = pud_none(__pud(rste));
+	} else {
 		present = pmd_present(__pmd(rste));
+		none = pmd_none(__pmd(rste));
+	}
 
 	/*
 	 * Convert encoding		pmd / pud bits	    pte bits
@@ -114,6 +126,11 @@ static inline pte_t __rste_to_pte(unsigned long rste)
 		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
 #endif
 		pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
+	} else if (!none) {
+		/* swap rste */
+		arch_entry = __rste_to_swp_entry(rste);
+		pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry));
+		pteval = pte_val(pte);
 	} else
 		pteval = _PAGE_INVALID;
 	return __pte(pteval);
@@ -121,7 +138,7 @@ static inline pte_t __rste_to_pte(unsigned long rste)
 
 static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
 {
-	struct page *page;
+	struct folio *folio;
 	unsigned long size, paddr;
 
 	if (!mm_uses_skeys(mm) ||
@@ -129,27 +146,25 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
 		return;
 
 	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
-		page = pud_page(__pud(rste));
+		folio = page_folio(pud_page(__pud(rste)));
 		size = PUD_SIZE;
 		paddr = rste & PUD_MASK;
 	} else {
-		page = pmd_page(__pmd(rste));
+		folio = page_folio(pmd_page(__pmd(rste)));
 		size = PMD_SIZE;
 		paddr = rste & PMD_MASK;
 	}
 
-	if (!test_and_set_bit(PG_arch_1, &page->flags))
-		__storage_key_init_range(paddr, paddr + size - 1);
+	if (!test_and_set_bit(PG_arch_1, &folio->flags))
+		__storage_key_init_range(paddr, paddr + size);
 }
 
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte)
 {
 	unsigned long rste;
 
 	rste = __pte_to_rste(pte);
-	if (!MACHINE_HAS_NX)
-		rste &= ~_SEGMENT_ENTRY_NOEXEC;
 
 	/* Set correct table type for 2G hugepages */
 	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
@@ -163,15 +178,21 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	set_pte(ptep, __pte(rste));
 }
 
-pte_t huge_ptep_get(pte_t *ptep)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	__set_huge_pte_at(mm, addr, ptep, pte);
+}
+
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	return __rste_to_pte(pte_val(*ptep));
 }
 
-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-			      unsigned long addr, pte_t *ptep)
+pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
+				unsigned long addr, pte_t *ptep)
 {
-	pte_t pte = huge_ptep_get(ptep);
+	pte_t pte = huge_ptep_get(mm, addr, ptep);
 	pmd_t *pmdp = (pmd_t *) ptep;
 	pud_t *pudp = (pud_t *) ptep;
 
@@ -217,130 +238,21 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 		p4dp = p4d_offset(pgdp, addr);
 		if (p4d_present(*p4dp)) {
 			pudp = pud_offset(p4dp, addr);
-			if (pud_present(*pudp)) {
-				if (pud_large(*pudp))
-					return (pte_t *) pudp;
+			if (sz == PUD_SIZE)
+				return (pte_t *)pudp;
+			if (pud_present(*pudp))
 				pmdp = pmd_offset(pudp, addr);
-			}
 		}
 	}
 	return (pte_t *) pmdp;
 }
 
-int pmd_huge(pmd_t pmd)
-{
-	return pmd_large(pmd);
-}
-
-int pud_huge(pud_t pud)
-{
-	return pud_large(pud);
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-		pud_t *pud, int flags)
-{
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-}
-
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
-	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+	if (cpu_has_edat1() && size == PMD_SIZE)
 		return true;
-	else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+	else if (cpu_has_edat2() && size == PUD_SIZE)
 		return true;
 	else
 		return false;
 }
-
-static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
-		unsigned long addr, unsigned long len,
-		unsigned long pgoff, unsigned long flags)
-{
-	struct hstate *h = hstate_file(file);
-	struct vm_unmapped_area_info info;
-
-	info.flags = 0;
-	info.length = len;
-	info.low_limit = current->mm->mmap_base;
-	info.high_limit = TASK_SIZE;
-	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
-	info.align_offset = 0;
-	return vm_unmapped_area(&info);
-}
-
-static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
-		unsigned long addr0, unsigned long len,
-		unsigned long pgoff, unsigned long flags)
-{
-	struct hstate *h = hstate_file(file);
-	struct vm_unmapped_area_info info;
-	unsigned long addr;
-
-	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-	info.length = len;
-	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-	info.high_limit = current->mm->mmap_base;
-	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
-	info.align_offset = 0;
-	addr = vm_unmapped_area(&info);
-
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	if (addr & ~PAGE_MASK) {
-		VM_BUG_ON(addr != -ENOMEM);
-		info.flags = 0;
-		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = TASK_SIZE;
-		addr = vm_unmapped_area(&info);
-	}
-
-	return addr;
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-		unsigned long len, unsigned long pgoff, unsigned long flags)
-{
-	struct hstate *h = hstate_file(file);
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-
-	if (len & ~huge_page_mask(h))
-		return -EINVAL;
-	if (len > TASK_SIZE - mmap_min_addr)
-		return -ENOMEM;
-
-	if (flags & MAP_FIXED) {
-		if (prepare_hugepage_range(file, addr, len))
-			return -EINVAL;
-		goto check_asce_limit;
-	}
-
-	if (addr) {
-		addr = ALIGN(addr, huge_page_size(h));
-		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		    (!vma || addr + len <= vm_start_gap(vma)))
-			goto check_asce_limit;
-	}
-
-	if (mm->get_unmapped_area == arch_get_unmapped_area)
-		addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
-				pgoff, flags);
-	else
-		addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
-				pgoff, flags);
-	if (offset_in_page(addr))
-		return addr;
-
-check_asce_limit:
-	return check_asce_limit(mm, addr, len);
-}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6a0ac00d5a42..074bf4fb4ce2 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -8,6 +8,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
+#include <linux/cpufeature.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -31,29 +32,38 @@
 #include <linux/cma.h>
 #include <linux/gfp.h>
 #include <linux/dma-direct.h>
-#include <linux/platform-feature.h>
+#include <linux/percpu.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
 #include <asm/pgalloc.h>
+#include <asm/ctlreg.h>
 #include <asm/kfence.h>
-#include <asm/ptdump.h>
 #include <asm/dma.h>
-#include <asm/lowcore.h>
-#include <asm/tlb.h>
+#include <asm/abs_lowcore.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
-#include <asm/ctl_reg.h>
 #include <asm/sclp.h>
 #include <asm/set_memory.h>
 #include <asm/kasan.h>
 #include <asm/dma-mapping.h>
 #include <asm/uv.h>
+#include <linux/virtio_anchor.h>
 #include <linux/virtio_config.h>
+#include <linux/execmem.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
-static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
 
-unsigned long s390_invalid_asce;
+struct ctlreg __bootdata_preserved(s390_invalid_asce);
+
+unsigned long __bootdata_preserved(page_noexec_mask);
+EXPORT_SYMBOL(page_noexec_mask);
+
+unsigned long __bootdata_preserved(segment_noexec_mask);
+EXPORT_SYMBOL(segment_noexec_mask);
+
+unsigned long __bootdata_preserved(region_noexec_mask);
+EXPORT_SYMBOL(region_noexec_mask);
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
@@ -61,27 +71,17 @@ EXPORT_SYMBOL(zero_page_mask);
 
 static void __init setup_zero_pages(void)
 {
+	unsigned long total_pages = memblock_estimated_nr_free_pages();
 	unsigned int order;
-	struct page *page;
-	int i;
 
 	/* Latest machines require a mapping granularity of 512KB */
 	order = 7;
 
 	/* Limit number of empty zero pages for small memory sizes */
-	while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
+	while (order > 2 && (total_pages >> 10) < (1UL << order))
 		order--;
 
-	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!empty_zero_page)
-		panic("Out of memory in setup_zero_pages");
-
-	page = virt_to_page((void *) empty_zero_page);
-	split_page(page, order);
-	for (i = 1 << order; i > 0; i--) {
-		mark_page_reserved(page);
-		page++;
-	}
+	empty_zero_page = (unsigned long)memblock_alloc_or_panic(PAGE_SIZE << order, PAGE_SIZE);
 
 	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
 }
@@ -92,41 +92,12 @@ static void __init setup_zero_pages(void)
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	unsigned long pgd_type, asce_bits;
-	psw_t psw;
-
-	s390_invalid_asce  = (unsigned long)invalid_pg_dir;
-	s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
-	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
-	init_mm.pgd = swapper_pg_dir;
-	if (VMALLOC_END > _REGION2_SIZE) {
-		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
-		pgd_type = _REGION2_ENTRY_EMPTY;
-	} else {
-		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
-		pgd_type = _REGION3_ENTRY_EMPTY;
-	}
-	init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
-	S390_lowcore.kernel_asce = init_mm.context.asce;
-	S390_lowcore.user_asce = s390_invalid_asce;
-	crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
-	vmem_map_init();
-	kasan_copy_shadow_mapping();
-
-	/* enable virtual mapping in kernel mode */
-	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	__ctl_load(S390_lowcore.user_asce, 7, 7);
-	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-	psw.mask = __extract_psw();
-	psw_bits(psw).dat = 1;
-	psw_bits(psw).as = PSW_BITS_AS_HOME;
-	__load_psw_mask(psw.mask);
-	kasan_free_early_identity();
 
+	vmem_map_init();
 	sparse_init();
-	zone_dma_bits = 31;
+	zone_dma_limit = DMA_BIT_MASK(31);
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+	max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 	free_area_init(max_zone_pfns);
 }
@@ -135,30 +106,31 @@ void mark_rodata_ro(void)
 {
 	unsigned long size = __end_ro_after_init - __start_ro_after_init;
 
-	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+	if (cpu_has_nx())
+		system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
+	__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
 	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
-	debug_checkwx();
 }
 
-int set_memory_encrypted(unsigned long addr, int numpages)
+int set_memory_encrypted(unsigned long vaddr, int numpages)
 {
 	int i;
 
 	/* make specified pages unshared, (swiotlb, dma_free) */
 	for (i = 0; i < numpages; ++i) {
-		uv_remove_shared(addr);
-		addr += PAGE_SIZE;
+		uv_remove_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
 	}
 	return 0;
 }
 
-int set_memory_decrypted(unsigned long addr, int numpages)
+int set_memory_decrypted(unsigned long vaddr, int numpages)
 {
 	int i;
 	/* make specified pages shared (swiotlb, dma_alloca) */
 	for (i = 0; i < numpages; ++i) {
-		uv_set_shared(addr);
-		addr += PAGE_SIZE;
+		uv_set_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
 	}
 	return 0;
 }
@@ -175,42 +147,21 @@ static void pv_init(void)
 	if (!is_prot_virt_guest())
 		return;
 
-	platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
+	virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
 
 	/* make sure bounce buffers are shared */
 	swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
 	swiotlb_update_mem_attributes();
 }
 
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
 {
 	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 
-	set_max_mapnr(max_low_pfn);
-        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
 	pv_init();
-	kfence_split_mapping();
-	/* Setup guest page hinting */
-	cmma_init();
 
-	/* this will put all low memory onto the freelists */
-	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages. */
-
-	cmma_init_nodat();
-}
-
-void free_initmem(void)
-{
-	__set_memory((unsigned long)_sinittext,
-		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
-		     SET_MEMORY_RW | SET_MEMORY_NX);
-	free_reserved_area(sclp_early_sccb,
-			   sclp_early_sccb + EXT_SCCB_READ_SCP,
-			   POISON_FREE_INITMEM, "unused early sccb");
-	free_initmem_default(POISON_FREE_INITMEM);
 }
 
 unsigned long memory_block_size_bytes(void)
@@ -222,6 +173,41 @@ unsigned long memory_block_size_bytes(void)
 	return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
 }
 
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	return LOCAL_DISTANCE;
+}
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return 0;
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+				    pcpu_cpu_distance,
+				    pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("Failed to initialize percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 #ifdef CONFIG_CMA
@@ -236,16 +222,13 @@ struct s390_cma_mem_data {
 static int s390_cma_check_range(struct cma *cma, void *data)
 {
 	struct s390_cma_mem_data *mem_data;
-	unsigned long start, end;
 
 	mem_data = data;
-	start = cma_get_base(cma);
-	end = start + cma_get_size(cma);
-	if (end < mem_data->start)
-		return 0;
-	if (start >= mem_data->end)
-		return 0;
-	return -EBUSY;
+
+	if (cma_intersects(cma, mem_data->start, mem_data->end))
+		return -EBUSY;
+
+	return 0;
 }
 
 static int s390_cma_mem_notifier(struct notifier_block *nb,
@@ -282,10 +265,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	unsigned long size_pages = PFN_DOWN(size);
 	int rc;
 
-	if (WARN_ON_ONCE(params->altmap))
-		return -EINVAL;
-
-	if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
+	if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL)))
 		return -EINVAL;
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
@@ -308,3 +288,32 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 	vmem_remove_mapping(start, size);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+	unsigned long module_load_offset = 0;
+	unsigned long start;
+
+	if (kaslr_enabled())
+		module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+
+	start = MODULES_VADDR + module_load_offset;
+
+	execmem_info = (struct execmem_info){
+		.ranges = {
+			[EXECMEM_DEFAULT] = {
+				.flags	= EXECMEM_KASAN_SHADOW,
+				.start	= start,
+				.end	= MODULES_END,
+				.pgprot	= PAGE_KERNEL,
+				.alignment = MODULE_ALIGN,
+			},
+		},
+	};
+
+	return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
deleted file mode 100644
index 9f988d4582ed..000000000000
--- a/arch/s390/mm/kasan_init.c
+++ /dev/null
@@ -1,403 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kasan.h>
-#include <linux/sched/task.h>
-#include <linux/memblock.h>
-#include <linux/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/kasan.h>
-#include <asm/mem_detect.h>
-#include <asm/processor.h>
-#include <asm/sclp.h>
-#include <asm/facility.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/uv.h>
-
-static unsigned long segment_pos __initdata;
-static unsigned long segment_low __initdata;
-static unsigned long pgalloc_pos __initdata;
-static unsigned long pgalloc_low __initdata;
-static unsigned long pgalloc_freeable __initdata;
-static bool has_edat __initdata;
-static bool has_nx __initdata;
-
-#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-
-static void __init kasan_early_panic(const char *reason)
-{
-	sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
-	sclp_early_printk(reason);
-	disabled_wait();
-}
-
-static void * __init kasan_early_alloc_segment(void)
-{
-	segment_pos -= _SEGMENT_SIZE;
-
-	if (segment_pos < segment_low)
-		kasan_early_panic("out of memory during initialisation\n");
-
-	return (void *)segment_pos;
-}
-
-static void * __init kasan_early_alloc_pages(unsigned int order)
-{
-	pgalloc_pos -= (PAGE_SIZE << order);
-
-	if (pgalloc_pos < pgalloc_low)
-		kasan_early_panic("out of memory during initialisation\n");
-
-	return (void *)pgalloc_pos;
-}
-
-static void * __init kasan_early_crst_alloc(unsigned long val)
-{
-	unsigned long *table;
-
-	table = kasan_early_alloc_pages(CRST_ALLOC_ORDER);
-	if (table)
-		crst_table_init(table, val);
-	return table;
-}
-
-static pte_t * __init kasan_early_pte_alloc(void)
-{
-	static void *pte_leftover;
-	pte_t *pte;
-
-	BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
-
-	if (!pte_leftover) {
-		pte_leftover = kasan_early_alloc_pages(0);
-		pte = pte_leftover + _PAGE_TABLE_SIZE;
-	} else {
-		pte = pte_leftover;
-		pte_leftover = NULL;
-	}
-	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
-	return pte;
-}
-
-enum populate_mode {
-	POPULATE_ONE2ONE,
-	POPULATE_MAP,
-	POPULATE_ZERO_SHADOW,
-	POPULATE_SHALLOW
-};
-static void __init kasan_early_pgtable_populate(unsigned long address,
-						unsigned long end,
-						enum populate_mode mode)
-{
-	unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
-	pgd_t *pg_dir;
-	p4d_t *p4_dir;
-	pud_t *pu_dir;
-	pmd_t *pm_dir;
-	pte_t *pt_dir;
-
-	pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
-	if (!has_nx)
-		pgt_prot_zero &= ~_PAGE_NOEXEC;
-	pgt_prot = pgprot_val(PAGE_KERNEL);
-	sgt_prot = pgprot_val(SEGMENT_KERNEL);
-	if (!has_nx || mode == POPULATE_ONE2ONE) {
-		pgt_prot &= ~_PAGE_NOEXEC;
-		sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-	}
-
-	/*
-	 * The first 1MB of 1:1 mapping is mapped with 4KB pages
-	 */
-	while (address < end) {
-		pg_dir = pgd_offset_k(address);
-		if (pgd_none(*pg_dir)) {
-			if (mode == POPULATE_ZERO_SHADOW &&
-			    IS_ALIGNED(address, PGDIR_SIZE) &&
-			    end - address >= PGDIR_SIZE) {
-				pgd_populate(&init_mm, pg_dir,
-						kasan_early_shadow_p4d);
-				address = (address + PGDIR_SIZE) & PGDIR_MASK;
-				continue;
-			}
-			p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY);
-			pgd_populate(&init_mm, pg_dir, p4_dir);
-		}
-
-		if (mode == POPULATE_SHALLOW) {
-			address = (address + P4D_SIZE) & P4D_MASK;
-			continue;
-		}
-
-		p4_dir = p4d_offset(pg_dir, address);
-		if (p4d_none(*p4_dir)) {
-			if (mode == POPULATE_ZERO_SHADOW &&
-			    IS_ALIGNED(address, P4D_SIZE) &&
-			    end - address >= P4D_SIZE) {
-				p4d_populate(&init_mm, p4_dir,
-						kasan_early_shadow_pud);
-				address = (address + P4D_SIZE) & P4D_MASK;
-				continue;
-			}
-			pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY);
-			p4d_populate(&init_mm, p4_dir, pu_dir);
-		}
-
-		pu_dir = pud_offset(p4_dir, address);
-		if (pud_none(*pu_dir)) {
-			if (mode == POPULATE_ZERO_SHADOW &&
-			    IS_ALIGNED(address, PUD_SIZE) &&
-			    end - address >= PUD_SIZE) {
-				pud_populate(&init_mm, pu_dir,
-						kasan_early_shadow_pmd);
-				address = (address + PUD_SIZE) & PUD_MASK;
-				continue;
-			}
-			pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-			pud_populate(&init_mm, pu_dir, pm_dir);
-		}
-
-		pm_dir = pmd_offset(pu_dir, address);
-		if (pmd_none(*pm_dir)) {
-			if (IS_ALIGNED(address, PMD_SIZE) &&
-			    end - address >= PMD_SIZE) {
-				if (mode == POPULATE_ZERO_SHADOW) {
-					pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte);
-					address = (address + PMD_SIZE) & PMD_MASK;
-					continue;
-				} else if (has_edat && address) {
-					void *page;
-
-					if (mode == POPULATE_ONE2ONE) {
-						page = (void *)address;
-					} else {
-						page = kasan_early_alloc_segment();
-						memset(page, 0, _SEGMENT_SIZE);
-					}
-					set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot));
-					address = (address + PMD_SIZE) & PMD_MASK;
-					continue;
-				}
-			}
-			pt_dir = kasan_early_pte_alloc();
-			pmd_populate(&init_mm, pm_dir, pt_dir);
-		} else if (pmd_large(*pm_dir)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-
-		pt_dir = pte_offset_kernel(pm_dir, address);
-		if (pte_none(*pt_dir)) {
-			void *page;
-
-			switch (mode) {
-			case POPULATE_ONE2ONE:
-				page = (void *)address;
-				set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
-				break;
-			case POPULATE_MAP:
-				page = kasan_early_alloc_pages(0);
-				memset(page, 0, PAGE_SIZE);
-				set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
-				break;
-			case POPULATE_ZERO_SHADOW:
-				page = kasan_early_shadow_page;
-				set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero));
-				break;
-			case POPULATE_SHALLOW:
-				/* should never happen */
-				break;
-			}
-		}
-		address += PAGE_SIZE;
-	}
-}
-
-static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
-{
-	unsigned long asce_bits;
-
-	asce_bits = asce_type | _ASCE_TABLE_LENGTH;
-	S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
-	S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
-	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
-	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
-}
-
-static void __init kasan_enable_dat(void)
-{
-	psw_t psw;
-
-	psw.mask = __extract_psw();
-	psw_bits(psw).dat = 1;
-	psw_bits(psw).as = PSW_BITS_AS_HOME;
-	__load_psw_mask(psw.mask);
-}
-
-static void __init kasan_early_detect_facilities(void)
-{
-	if (test_facility(8)) {
-		has_edat = true;
-		__ctl_set_bit(0, 23);
-	}
-	if (!noexec_disabled && test_facility(130)) {
-		has_nx = true;
-		__ctl_set_bit(0, 20);
-	}
-}
-
-void __init kasan_early_init(void)
-{
-	unsigned long shadow_alloc_size;
-	unsigned long initrd_end;
-	unsigned long memsize;
-	unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
-	pte_t pte_z;
-	pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
-	pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
-	p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
-
-	kasan_early_detect_facilities();
-	if (!has_nx)
-		pgt_prot &= ~_PAGE_NOEXEC;
-	pte_z = __pte(__pa(kasan_early_shadow_page) | pgt_prot);
-
-	memsize = get_mem_detect_end();
-	if (!memsize)
-		kasan_early_panic("cannot detect physical memory size\n");
-	/*
-	 * Kasan currently supports standby memory but only if it follows
-	 * online memory (default allocation), i.e. no memory holes.
-	 * - memsize represents end of online memory
-	 * - ident_map_size represents online + standby and memory limits
-	 *   accounted.
-	 * Kasan maps "memsize" right away.
-	 * [0, memsize]			- as identity mapping
-	 * [__sha(0), __sha(memsize)]	- shadow memory for identity mapping
-	 * The rest [memsize, ident_map_size] if memsize < ident_map_size
-	 * could be mapped/unmapped dynamically later during memory hotplug.
-	 */
-	memsize = min(memsize, ident_map_size);
-
-	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
-	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
-	crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY);
-
-	/* init kasan zero shadow */
-	crst_table_init((unsigned long *)kasan_early_shadow_p4d,
-				p4d_val(p4d_z));
-	crst_table_init((unsigned long *)kasan_early_shadow_pud,
-				pud_val(pud_z));
-	crst_table_init((unsigned long *)kasan_early_shadow_pmd,
-				pmd_val(pmd_z));
-	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
-
-	shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
-	pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
-	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
-		initrd_end =
-		    round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
-		pgalloc_low = max(pgalloc_low, initrd_end);
-	}
-
-	if (pgalloc_low + shadow_alloc_size > memsize)
-		kasan_early_panic("out of memory during initialisation\n");
-
-	if (has_edat) {
-		segment_pos = round_down(memsize, _SEGMENT_SIZE);
-		segment_low = segment_pos - shadow_alloc_size;
-		pgalloc_pos = segment_low;
-	} else {
-		pgalloc_pos = memsize;
-	}
-	init_mm.pgd = early_pg_dir;
-	/*
-	 * Current memory layout:
-	 * +- 0 -------------+	   +- shadow start -+
-	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
-	 * |		     |	 / |		    |
-	 * +- end of ram ----+	/  +----------------+
-	 * | ... gap ...     | /   |		    |
-	 * |		     |/    |	kasan	    |
-	 * +- shadow start --+	   |	zero	    |
-	 * | 1/8 addr space  |	   |	page	    |
-	 * +- shadow end    -+	   |	mapping	    |
-	 * | ... gap ...     |\    |  (untracked)   |
-	 * +- vmalloc area  -+ \   |		    |
-	 * | vmalloc_size    |	\  |		    |
-	 * +- modules vaddr -+	 \ +----------------+
-	 * | 2Gb	     |	  \|	  unmapped  | allocated per module
-	 * +-----------------+	   +- shadow end ---+
-	 *
-	 * Current memory layout (KASAN_VMALLOC):
-	 * +- 0 -------------+	   +- shadow start -+
-	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
-	 * |		     |	 / |		    |
-	 * +- end of ram ----+	/  +----------------+
-	 * | ... gap ...     | /   |	kasan	    |
-	 * |		     |/    |	zero	    |
-	 * +- shadow start --+	   |	page	    |
-	 * | 1/8 addr space  |	   |	mapping     |
-	 * +- shadow end    -+	   |  (untracked)   |
-	 * | ... gap ...     |\    |		    |
-	 * +- vmalloc area  -+ \   +- vmalloc area -+
-	 * | vmalloc_size    |	\  |shallow populate|
-	 * +- modules vaddr -+	 \ +- modules area -+
-	 * | 2Gb	     |	  \|shallow populate|
-	 * +-----------------+	   +- shadow end ---+
-	 */
-	/* populate kasan shadow (for identity mapping and zero page mapping) */
-	kasan_early_pgtable_populate(__sha(0), __sha(memsize), POPULATE_MAP);
-	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
-		/* shallowly populate kasan shadow for vmalloc and modules */
-		kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END),
-					     POPULATE_SHALLOW);
-	}
-	/* populate kasan shadow for untracked memory */
-	kasan_early_pgtable_populate(__sha(ident_map_size),
-				     IS_ENABLED(CONFIG_KASAN_VMALLOC) ?
-						   __sha(VMALLOC_START) :
-						   __sha(MODULES_VADDR),
-				     POPULATE_ZERO_SHADOW);
-	kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE),
-				     POPULATE_ZERO_SHADOW);
-	/* memory allocated for identity mapping structs will be freed later */
-	pgalloc_freeable = pgalloc_pos;
-	/* populate identity mapping */
-	kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE);
-	kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2);
-	kasan_enable_dat();
-	/* enable kasan */
-	init_task.kasan_depth = 0;
-	memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
-	sclp_early_printk("KernelAddressSanitizer initialized\n");
-}
-
-void __init kasan_copy_shadow_mapping(void)
-{
-	/*
-	 * At this point we are still running on early pages setup early_pg_dir,
-	 * while swapper_pg_dir has just been initialized with identity mapping.
-	 * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
-	 */
-
-	pgd_t *pg_dir_src;
-	pgd_t *pg_dir_dst;
-	p4d_t *p4_dir_src;
-	p4d_t *p4_dir_dst;
-
-	pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
-	pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START);
-	p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
-	p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
-	memcpy(p4_dir_dst, p4_dir_src,
-	       (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
-}
-
-void __init kasan_free_early_identity(void)
-{
-	memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
-}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 421efa46946b..44426e0f2944 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,10 +12,18 @@
 #include <linux/errno.h>
 #include <linux/gfp.h>
 #include <linux/cpu.h>
+#include <linux/uio.h>
+#include <linux/io.h>
 #include <asm/asm-extable.h>
-#include <asm/ctl_reg.h>
-#include <asm/io.h>
+#include <asm/abs_lowcore.h>
 #include <asm/stacktrace.h>
+#include <asm/sections.h>
+#include <asm/maccess.h>
+#include <asm/ctlreg.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
+static DEFINE_MUTEX(memcpy_real_mutex);
 
 static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
 {
@@ -41,7 +49,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
 }
 
 /*
- * s390_kernel_write - write to kernel memory bypassing DAT
+ * __s390_kernel_write - write to kernel memory bypassing DAT
  * @dst: destination address
  * @src: source address
  * @size: number of bytes to copy
@@ -54,166 +62,84 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
  */
 static DEFINE_SPINLOCK(s390_kernel_write_lock);
 
-notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *__s390_kernel_write(void *dst, const void *src, size_t size)
 {
 	void *tmp = dst;
 	unsigned long flags;
 	long copied;
 
 	spin_lock_irqsave(&s390_kernel_write_lock, flags);
-	if (!(flags & PSW_MASK_DAT)) {
-		memcpy(dst, src, size);
-	} else {
-		while (size) {
-			copied = s390_kernel_write_odd(tmp, src, size);
-			tmp += copied;
-			src += copied;
-			size -= copied;
-		}
+	while (size) {
+		copied = s390_kernel_write_odd(tmp, src, size);
+		tmp += copied;
+		src += copied;
+		size -= copied;
 	}
 	spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
 
 	return dst;
 }
 
-static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
-{
-	union register_pair _dst, _src;
-	int rc = -EFAULT;
-
-	_dst.even = (unsigned long) dest;
-	_dst.odd  = (unsigned long) count;
-	_src.even = (unsigned long) src;
-	_src.odd  = (unsigned long) count;
-	asm volatile (
-		"0:	mvcle	%[dst],%[src],0\n"
-		"1:	jo	0b\n"
-		"	lhi	%[rc],0\n"
-		"2:\n"
-		EX_TABLE(1b,2b)
-		: [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
-		: : "cc", "memory");
-	return rc;
-}
-
-static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
-							unsigned long src,
-							unsigned long count)
-{
-	int irqs_disabled, rc;
-	unsigned long flags;
-
-	if (!count)
-		return 0;
-	flags = arch_local_irq_save();
-	irqs_disabled = arch_irqs_disabled_flags(flags);
-	if (!irqs_disabled)
-		trace_hardirqs_off();
-	__arch_local_irq_stnsm(0xf8); // disable DAT
-	rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
-	if (flags & PSW_MASK_DAT)
-		__arch_local_irq_stosm(0x04); // enable DAT
-	if (!irqs_disabled)
-		trace_hardirqs_on();
-	__arch_local_irq_ssm(flags);
-	return rc;
-}
-
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, unsigned long src, size_t count)
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
 {
-	unsigned long _dest  = (unsigned long)dest;
-	unsigned long _src   = (unsigned long)src;
-	unsigned long _count = (unsigned long)count;
-	int rc;
-
-	if (S390_lowcore.nodat_stack != 0) {
-		preempt_disable();
-		rc = call_on_stack(3, S390_lowcore.nodat_stack,
-				   unsigned long, _memcpy_real,
-				   unsigned long, _dest,
-				   unsigned long, _src,
-				   unsigned long, _count);
-		preempt_enable();
-		return rc;
-	}
-	/*
-	 * This is a really early memcpy_real call, the stacks are
-	 * not set up yet. Just call _memcpy_real on the early boot
-	 * stack
-	 */
-	return _memcpy_real(_dest, _src, _count);
-}
-
-/*
- * Copy memory in absolute mode (kernel to kernel)
- */
-void memcpy_absolute(void *dest, void *src, size_t count)
-{
-	unsigned long cr0, flags, prefix;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28); /* disable lowcore protection */
-	prefix = store_prefix();
-	if (prefix) {
-		local_mcck_disable();
-		set_prefix(0);
-		memcpy(dest, src, count);
-		set_prefix(prefix);
-		local_mcck_enable();
-	} else {
-		memcpy(dest, src, count);
+	size_t len, copied, res = 0;
+	unsigned long phys, offset;
+	void *chunk;
+	pte_t pte;
+
+	BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE);
+	while (count) {
+		phys = src & MEMCPY_REAL_MASK;
+		offset = src & ~MEMCPY_REAL_MASK;
+		chunk = (void *)(__memcpy_real_area + offset);
+		len = min(count, MEMCPY_REAL_SIZE - offset);
+		pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+		mutex_lock(&memcpy_real_mutex);
+		if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+			__ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+			set_pte(memcpy_real_ptep, pte);
+		}
+		copied = copy_to_iter(chunk, len, iter);
+		mutex_unlock(&memcpy_real_mutex);
+
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
 	}
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
+	return res;
 }
 
-/*
- * Copy memory from kernel (real) to user (virtual)
- */
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count)
+int memcpy_real(void *dest, unsigned long src, size_t count)
 {
-	int offs = 0, size, rc;
-	char *buf;
-
-	buf = (char *) __get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	rc = -EFAULT;
-	while (offs < count) {
-		size = min(PAGE_SIZE, count - offs);
-		if (memcpy_real(buf, src + offs, size))
-			goto out;
-		if (copy_to_user(dest + offs, buf, size))
-			goto out;
-		offs += size;
-	}
-	rc = 0;
-out:
-	free_page((unsigned long) buf);
-	return rc;
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dest;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+	if (memcpy_real_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
 }
 
 /*
- * Check if physical address is within prefix or zero page
+ * Find CPU that owns swapped prefix page
  */
-static int is_swapped(phys_addr_t addr)
+static int get_swapped_owner(phys_addr_t addr)
 {
 	phys_addr_t lc;
 	int cpu;
 
-	if (addr < sizeof(struct lowcore))
-		return 1;
 	for_each_online_cpu(cpu) {
 		lc = virt_to_phys(lowcore_ptr[cpu]);
 		if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
 			continue;
-		return 1;
+		return cpu;
 	}
-	return 0;
+	return -1;
 }
 
 /*
@@ -226,17 +152,34 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
 {
 	void *ptr = phys_to_virt(addr);
 	void *bounce = ptr;
+	struct lowcore *abs_lc;
 	unsigned long size;
+	int this_cpu, cpu;
 
 	cpus_read_lock();
-	preempt_disable();
-	if (is_swapped(addr)) {
-		size = PAGE_SIZE - (addr & ~PAGE_MASK);
-		bounce = (void *) __get_free_page(GFP_ATOMIC);
-		if (bounce)
-			memcpy_absolute(bounce, ptr, size);
+	this_cpu = get_cpu();
+	if (addr >= sizeof(struct lowcore)) {
+		cpu = get_swapped_owner(addr);
+		if (cpu < 0)
+			goto out;
+	}
+	bounce = (void *)__get_free_page(GFP_ATOMIC);
+	if (!bounce)
+		goto out;
+	size = PAGE_SIZE - (addr & ~PAGE_MASK);
+	if (addr < sizeof(struct lowcore)) {
+		abs_lc = get_abs_lowcore();
+		ptr = (void *)abs_lc + addr;
+		memcpy(bounce, ptr, size);
+		put_abs_lowcore(abs_lc);
+	} else if (cpu == this_cpu) {
+		ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+		memcpy(bounce, ptr, size);
+	} else {
+		memcpy(bounce, ptr, size);
 	}
-	preempt_enable();
+out:
+	put_cpu();
 	cpus_read_unlock();
 	return bounce;
 }
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index d545f5c39f7e..40a526d28184 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -17,6 +17,7 @@
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
+#include <linux/hugetlb.h>
 #include <asm/elf.h>
 
 static unsigned long stack_maxrandom_size(void)
@@ -37,7 +38,7 @@ static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 
 unsigned long arch_mmap_rnd(void)
 {
-	return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
+	return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT;
 }
 
 static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -50,7 +51,6 @@ static inline unsigned long mmap_base(unsigned long rnd,
 {
 	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-	unsigned long gap_min, gap_max;
 
 	/* Values close to RLIM_INFINITY can overflow. */
 	if (gap + pad > gap)
@@ -60,24 +60,29 @@ static inline unsigned long mmap_base(unsigned long rnd,
 	 * Top of mmap area (just below the process stack).
 	 * Leave at least a ~128 MB hole.
 	 */
-	gap_min = SZ_128M;
-	gap_max = (STACK_TOP / 6) * 5;
-
-	if (gap < gap_min)
-		gap = gap_min;
-	else if (gap > gap_max)
-		gap = gap_max;
+	gap = clamp(gap, SZ_128M, (STACK_TOP / 6) * 5);
 
 	return PAGE_ALIGN(STACK_TOP - gap - rnd);
 }
 
+static int get_align_mask(struct file *filp, unsigned long flags)
+{
+	if (filp && is_file_hugepages(filp))
+		return huge_page_mask_align(filp);
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+	if (filp || (flags & MAP_SHARED))
+		return MMAP_ALIGN_MASK << PAGE_SHIFT;
+	return 0;
+}
+
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 				     unsigned long len, unsigned long pgoff,
-				     unsigned long flags)
+				     unsigned long flags, vm_flags_t vm_flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	struct vm_unmapped_area_info info;
+	struct vm_unmapped_area_info info = {};
 
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
@@ -93,15 +98,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 			goto check_asce_limit;
 	}
 
-	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_base;
 	info.high_limit = TASK_SIZE;
-	if (filp || (flags & MAP_SHARED))
-		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
-	else
-		info.align_mask = 0;
-	info.align_offset = pgoff << PAGE_SHIFT;
+	info.align_mask = get_align_mask(filp, flags);
+	if (!(filp && is_file_hugepages(filp)))
+		info.align_offset = pgoff << PAGE_SHIFT;
 	addr = vm_unmapped_area(&info);
 	if (offset_in_page(addr))
 		return addr;
@@ -112,11 +114,11 @@ check_asce_limit:
 
 unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 					     unsigned long len, unsigned long pgoff,
-					     unsigned long flags)
+					     unsigned long flags, vm_flags_t vm_flags)
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	struct vm_unmapped_area_info info;
+	struct vm_unmapped_area_info info = {};
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -136,13 +138,11 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
-	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+	info.low_limit = PAGE_SIZE;
 	info.high_limit = mm->mmap_base;
-	if (filp || (flags & MAP_SHARED))
-		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
-	else
-		info.align_mask = 0;
-	info.align_offset = pgoff << PAGE_SHIFT;
+	info.align_mask = get_align_mask(filp, flags);
+	if (!(filp && is_file_hugepages(filp)))
+		info.align_offset = pgoff << PAGE_SHIFT;
 	addr = vm_unmapped_area(&info);
 
 	/*
@@ -182,9 +182,35 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 	 */
 	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = mmap_base_legacy(random_factor);
-		mm->get_unmapped_area = arch_get_unmapped_area;
+		clear_bit(MMF_TOPDOWN, &mm->flags);
 	} else {
 		mm->mmap_base = mmap_base(random_factor, rlim_stack);
-		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+		set_bit(MMF_TOPDOWN, &mm->flags);
 	}
 }
+
+static pgprot_t protection_map[16] __ro_after_init;
+
+void __init setup_protection_map(void)
+{
+	pgprot_t *pm = protection_map;
+
+	pm[VM_NONE]					= PAGE_NONE;
+	pm[VM_READ]					= PAGE_RO;
+	pm[VM_WRITE]					= PAGE_RO;
+	pm[VM_WRITE | VM_READ]				= PAGE_RO;
+	pm[VM_EXEC]					= PAGE_RX;
+	pm[VM_EXEC | VM_READ]				= PAGE_RX;
+	pm[VM_EXEC | VM_WRITE]				= PAGE_RX;
+	pm[VM_EXEC | VM_WRITE | VM_READ]		= PAGE_RX;
+	pm[VM_SHARED]					= PAGE_NONE;
+	pm[VM_SHARED | VM_READ]				= PAGE_RO;
+	pm[VM_SHARED | VM_WRITE]			= PAGE_RW;
+	pm[VM_SHARED | VM_WRITE | VM_READ]		= PAGE_RW;
+	pm[VM_SHARED | VM_EXEC]				= PAGE_RX;
+	pm[VM_SHARED | VM_EXEC | VM_READ]		= PAGE_RX;
+	pm[VM_SHARED | VM_EXEC | VM_WRITE]		= PAGE_RWX;
+	pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ]	= PAGE_RWX;
+}
+
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index d5ea09d78938..01f9b39e65f5 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -7,210 +7,18 @@
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <asm/asm-extable.h>
-#include <asm/facility.h>
 #include <asm/page-states.h>
+#include <asm/sections.h>
+#include <asm/page.h>
 
-static int cmma_flag = 1;
-
-static int __init cmma(char *str)
-{
-	bool enabled;
-
-	if (!kstrtobool(str, &enabled))
-		cmma_flag = enabled;
-	return 1;
-}
-__setup("cmma=", cmma);
-
-static inline int cmma_test_essa(void)
-{
-	unsigned long tmp = 0;
-	int rc = -EOPNOTSUPP;
-
-	/* test ESSA_GET_STATE */
-	asm volatile(
-		"	.insn	rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
-		"0:     la      %[rc],0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: [rc] "+&d" (rc), [tmp] "+&d" (tmp)
-		: [cmd] "i" (ESSA_GET_STATE));
-	return rc;
-}
-
-void __init cmma_init(void)
-{
-	if (!cmma_flag)
-		return;
-	if (cmma_test_essa()) {
-		cmma_flag = 0;
-		return;
-	}
-	if (test_facility(147))
-		cmma_flag = 2;
-}
-
-static inline unsigned char get_page_state(struct page *page)
-{
-	unsigned char state;
-
-	asm volatile("	.insn	rrf,0xb9ab0000,%0,%1,%2,0"
-		     : "=&d" (state)
-		     : "a" (page_to_phys(page)),
-		       "i" (ESSA_GET_STATE));
-	return state & 0x3f;
-}
-
-static inline void set_page_unused(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_UNUSED));
-}
-
-static inline void set_page_stable_dat(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_STABLE));
-}
-
-static inline void set_page_stable_nodat(struct page *page, int order)
-{
-	int i, rc;
-
-	for (i = 0; i < (1 << order); i++)
-		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
-			     : "=&d" (rc)
-			     : "a" (page_to_phys(page + i)),
-			       "i" (ESSA_SET_STABLE_NODAT));
-}
-
-static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	pmd_t *pmd;
-
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none(*pmd) || pmd_large(*pmd))
-			continue;
-		page = phys_to_page(pmd_val(*pmd));
-		set_bit(PG_arch_1, &page->flags);
-	} while (pmd++, addr = next, addr != end);
-}
-
-static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	pud_t *pud;
-	int i;
-
-	pud = pud_offset(p4d, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none(*pud) || pud_large(*pud))
-			continue;
-		if (!pud_folded(*pud)) {
-			page = phys_to_page(pud_val(*pud));
-			for (i = 0; i < 3; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_pmd(pud, addr, next);
-	} while (pud++, addr = next, addr != end);
-}
-
-static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	struct page *page;
-	p4d_t *p4d;
-	int i;
-
-	p4d = p4d_offset(pgd, addr);
-	do {
-		next = p4d_addr_end(addr, end);
-		if (p4d_none(*p4d))
-			continue;
-		if (!p4d_folded(*p4d)) {
-			page = phys_to_page(p4d_val(*p4d));
-			for (i = 0; i < 3; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_pud(p4d, addr, next);
-	} while (p4d++, addr = next, addr != end);
-}
-
-static void mark_kernel_pgd(void)
-{
-	unsigned long addr, next;
-	struct page *page;
-	pgd_t *pgd;
-	int i;
-
-	addr = 0;
-	pgd = pgd_offset_k(addr);
-	do {
-		next = pgd_addr_end(addr, MODULES_END);
-		if (pgd_none(*pgd))
-			continue;
-		if (!pgd_folded(*pgd)) {
-			page = phys_to_page(pgd_val(*pgd));
-			for (i = 0; i < 3; i++)
-				set_bit(PG_arch_1, &page[i].flags);
-		}
-		mark_kernel_p4d(pgd, addr, next);
-	} while (pgd++, addr = next, addr != MODULES_END);
-}
-
-void __init cmma_init_nodat(void)
-{
-	struct page *page;
-	unsigned long start, end, ix;
-	int i;
-
-	if (cmma_flag < 2)
-		return;
-	/* Mark pages used in kernel page tables */
-	mark_kernel_pgd();
-
-	/* Set all kernel pages not used for page tables to stable/no-dat */
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
-		page = pfn_to_page(start);
-		for (ix = start; ix < end; ix++, page++) {
-			if (__test_and_clear_bit(PG_arch_1, &page->flags))
-				continue;	/* skip page table pages */
-			if (!list_empty(&page->lru))
-				continue;	/* skip free pages */
-			set_page_stable_nodat(page, 0);
-		}
-	}
-}
+int __bootdata_preserved(cmma_flag);
 
 void arch_free_page(struct page *page, int order)
 {
 	if (!cmma_flag)
 		return;
-	set_page_unused(page, order);
+	__set_page_unused(page_to_virt(page), 1UL << order);
 }
 
 void arch_alloc_page(struct page *page, int order)
@@ -218,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
 	if (!cmma_flag)
 		return;
 	if (cmma_flag < 2)
-		set_page_stable_dat(page, order);
+		__set_page_stable_dat(page_to_virt(page), 1UL << order);
 	else
-		set_page_stable_nodat(page, order);
-}
-
-void arch_set_page_dat(struct page *page, int order)
-{
-	if (!cmma_flag)
-		return;
-	set_page_stable_dat(page, order);
+		__set_page_stable_nodat(page_to_virt(page), 1UL << order);
 }
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 85195c18b2e8..348e759840e7 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -3,13 +3,17 @@
  * Copyright IBM Corp. 2011
  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
  */
+#include <linux/cpufeature.h>
 #include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <asm/cacheflush.h>
 #include <asm/facility.h>
 #include <asm/pgalloc.h>
 #include <asm/kfence.h>
 #include <asm/page.h>
+#include <asm/asm.h>
 #include <asm/set_memory.h>
 
 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
@@ -24,7 +28,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 	unsigned long boundary, size;
 
 	while (start < end) {
-		if (MACHINE_HAS_EDAT1) {
+		if (cpu_has_edat1()) {
 			/* set storage keys for a 1MB frame */
 			size = 1UL << 20;
 			boundary = (start + size) & ~(size - 1);
@@ -41,7 +45,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 }
 
 #ifdef CONFIG_PROC_FS
-atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
 
 void arch_report_meminfo(struct seq_file *m)
 {
@@ -60,7 +64,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
 	unsigned long *table, mask;
 
 	mask = 0;
-	if (MACHINE_HAS_EDAT2) {
+	if (cpu_has_edat2()) {
 		switch (dtt) {
 		case CRDTE_DTT_REGION3:
 			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
@@ -73,8 +77,8 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
 			break;
 		}
 		table = (unsigned long *)((unsigned long)old & mask);
-		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
-	} else if (MACHINE_HAS_IDTE) {
+		crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val);
+	} else if (cpu_has_idte()) {
 		cspg(old, *old, new);
 	} else {
 		csp((unsigned int *)old + 1, *old, new);
@@ -96,11 +100,17 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		if (flags & SET_MEMORY_RO)
 			new = pte_wrprotect(new);
 		else if (flags & SET_MEMORY_RW)
-			new = pte_mkwrite(pte_mkdirty(new));
+			new = pte_mkwrite_novma(pte_mkdirty(new));
 		if (flags & SET_MEMORY_NX)
 			new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
 		else if (flags & SET_MEMORY_X)
 			new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
+		if (flags & SET_MEMORY_INV) {
+			new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
+		} else if (flags & SET_MEMORY_DEF) {
+			new = __pte(pte_val(new) & PAGE_MASK);
+			new = set_pte_bit(new, PAGE_KERNEL);
+		}
 		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
 		ptep++;
 		addr += PAGE_SIZE;
@@ -146,11 +156,17 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
 	if (flags & SET_MEMORY_RO)
 		new = pmd_wrprotect(new);
 	else if (flags & SET_MEMORY_RW)
-		new = pmd_mkwrite(pmd_mkdirty(new));
+		new = pmd_mkwrite_novma(pmd_mkdirty(new));
 	if (flags & SET_MEMORY_NX)
 		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
 	else if (flags & SET_MEMORY_X)
 		new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+	if (flags & SET_MEMORY_INV) {
+		new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
+	} else if (flags & SET_MEMORY_DEF) {
+		new = __pmd(pmd_val(new) & PMD_MASK);
+		new = set_pmd_bit(new, SEGMENT_KERNEL);
+	}
 	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
 }
 
@@ -167,7 +183,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
 		if (pmd_none(*pmdp))
 			return -EINVAL;
 		next = pmd_addr_end(addr, end);
-		if (pmd_large(*pmdp)) {
+		if (pmd_leaf(*pmdp)) {
 			need_split  = !!(flags & SET_MEMORY_4K);
 			need_split |= !!(addr & ~PMD_MASK);
 			need_split |= !!(addr + PMD_SIZE > next);
@@ -232,6 +248,12 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
 		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
 	else if (flags & SET_MEMORY_X)
 		new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
+	if (flags & SET_MEMORY_INV) {
+		new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
+	} else if (flags & SET_MEMORY_DEF) {
+		new = __pud(pud_val(new) & PUD_MASK);
+		new = set_pud_bit(new, REGION3_KERNEL);
+	}
 	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
 }
 
@@ -248,7 +270,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
 		if (pud_none(*pudp))
 			return -EINVAL;
 		next = pud_addr_end(addr, end);
-		if (pud_large(*pudp)) {
+		if (pud_leaf(*pudp)) {
 			need_split  = !!(flags & SET_MEMORY_4K);
 			need_split |= !!(addr & ~PUD_MASK);
 			need_split |= !!(addr + PUD_SIZE > next);
@@ -298,11 +320,6 @@ static int change_page_attr(unsigned long addr, unsigned long end,
 	int rc = -EINVAL;
 	pgd_t *pgdp;
 
-	if (addr == end)
-		return 0;
-	if (end >= MODULES_END)
-		return -EINVAL;
-	mutex_lock(&cpa_mutex);
 	pgdp = pgd_offset_k(addr);
 	do {
 		if (pgd_none(*pgdp))
@@ -313,18 +330,103 @@ static int change_page_attr(unsigned long addr, unsigned long end,
 			break;
 		cond_resched();
 	} while (pgdp++, addr = next, addr < end && !rc);
-	mutex_unlock(&cpa_mutex);
 	return rc;
 }
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+static int change_page_attr_alias(unsigned long addr, unsigned long end,
+				  unsigned long flags)
 {
-	if (!MACHINE_HAS_NX)
+	unsigned long alias, offset, va_start, va_end;
+	struct vm_struct *area;
+	int rc = 0;
+
+	/*
+	 * Changes to read-only permissions on kernel VA mappings are also
+	 * applied to the kernel direct mapping. Execute permissions are
+	 * intentionally not transferred to keep all allocated pages within
+	 * the direct mapping non-executable.
+	 */
+	flags &= SET_MEMORY_RO | SET_MEMORY_RW;
+	if (!flags)
+		return 0;
+	area = NULL;
+	while (addr < end) {
+		if (!area)
+			area = find_vm_area((void *)addr);
+		if (!area || !(area->flags & VM_ALLOC))
+			return 0;
+		va_start = (unsigned long)area->addr;
+		va_end = va_start + area->nr_pages * PAGE_SIZE;
+		offset = (addr - va_start) >> PAGE_SHIFT;
+		alias = (unsigned long)page_address(area->pages[offset]);
+		rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
+		if (rc)
+			break;
+		addr += PAGE_SIZE;
+		if (addr >= va_end)
+			area = NULL;
+	}
+	return rc;
+}
+
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
+{
+	unsigned long end;
+	int rc;
+
+	if (!cpu_has_nx())
 		flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
 	if (!flags)
 		return 0;
+	if (!numpages)
+		return 0;
 	addr &= PAGE_MASK;
-	return change_page_attr(addr, addr + numpages * PAGE_SIZE, flags);
+	end = addr + numpages * PAGE_SIZE;
+	mutex_lock(&cpa_mutex);
+	rc = change_page_attr(addr, end, flags);
+	if (rc)
+		goto out;
+	rc = change_page_attr_alias(addr, end, flags);
+out:
+	mutex_unlock(&cpa_mutex);
+	return rc;
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
+}
+
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid)
+{
+	unsigned long flags;
+
+	if (valid)
+		flags = SET_MEMORY_DEF;
+	else
+		flags = SET_MEMORY_INV;
+
+	return __set_memory((unsigned long)page_to_virt(page), nr, flags);
+}
+
+bool kernel_page_present(struct page *page)
+{
+	unsigned long addr;
+	unsigned int cc;
+
+	addr = (unsigned long)page_address(page);
+	asm volatile(
+		"	lra	%[addr],0(%[addr])\n"
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [addr] "+a" (addr)
+		:
+		: CC_CLOBBER);
+	return CC_TRANSFORM(cc) == 0;
 }
 
 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
new file mode 100644
index 000000000000..e6175d75e4b0
--- /dev/null
+++ b/arch/s390/mm/pfault.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/sched/task.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000UL
+
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+	pfault_disable = 1;
+	return 1;
+}
+early_param("nopfault", nopfault);
+
+struct pfault_refbk {
+	u16 refdiagc;
+	u16 reffcode;
+	u16 refdwlen;
+	u16 refversn;
+	u64 refgaddr;
+	u64 refselmk;
+	u64 refcmpmk;
+	u64 reserved;
+};
+
+static struct pfault_refbk pfault_init_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 0,
+	.refdwlen = 5,
+	.refversn = 2,
+	.refgaddr = __LC_LPP,
+	.refselmk = 1UL << 48,
+	.refcmpmk = 1UL << 48,
+	.reserved = __PF_RES_FIELD
+};
+
+int __pfault_init(void)
+{
+	int rc = -EOPNOTSUPP;
+
+	if (pfault_disable)
+		return rc;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm_inline volatile(
+		"	diag	%[refbk],%[rc],0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rc] "+d" (rc)
+		: [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+		: "cc");
+	return rc;
+}
+
+static struct pfault_refbk pfault_fini_refbk = {
+	.refdiagc = 0x258,
+	.reffcode = 1,
+	.refdwlen = 5,
+	.refversn = 2,
+};
+
+void __pfault_fini(void)
+{
+	if (pfault_disable)
+		return;
+	diag_stat_inc(DIAG_STAT_X258);
+	asm_inline volatile(
+		"	diag	%[refbk],0,0x258\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		:
+		: [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+		: "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+#define PF_COMPLETE	0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule().  It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
+static void pfault_interrupt(struct ext_code ext_code,
+			     unsigned int param32, unsigned long param64)
+{
+	struct task_struct *tsk;
+	__u16 subcode;
+	pid_t pid;
+
+	/*
+	 * Get the external interruption subcode & pfault initial/completion
+	 * signal bit. VM stores this in the 'cpu address' field associated
+	 * with the external interrupt.
+	 */
+	subcode = ext_code.subcode;
+	if ((subcode & 0xff00) != __SUBCODE_MASK)
+		return;
+	inc_irq_stat(IRQEXT_PFL);
+	/* Get the token (= pid of the affected task). */
+	pid = param64 & LPP_PID_MASK;
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (tsk)
+		get_task_struct(tsk);
+	rcu_read_unlock();
+	if (!tsk)
+		return;
+	spin_lock(&pfault_lock);
+	if (subcode & PF_COMPLETE) {
+		/* signal bit is set -> a page has been swapped in by VM */
+		if (tsk->thread.pfault_wait == 1) {
+			/*
+			 * Initial interrupt was faster than the completion
+			 * interrupt. pfault_wait is valid. Set pfault_wait
+			 * back to zero and wake up the process. This can
+			 * safely be done because the task is still sleeping
+			 * and can't produce new pfaults.
+			 */
+			tsk->thread.pfault_wait = 0;
+			list_del(&tsk->thread.list);
+			wake_up_process(tsk);
+			put_task_struct(tsk);
+		} else {
+			/*
+			 * Completion interrupt was faster than initial
+			 * interrupt. Set pfault_wait to -1 so the initial
+			 * interrupt doesn't put the task to sleep.
+			 * If the task is not running, ignore the completion
+			 * interrupt since it must be a leftover of a PFAULT
+			 * CANCEL operation which didn't remove all pending
+			 * completion interrupts.
+			 */
+			if (task_is_running(tsk))
+				tsk->thread.pfault_wait = -1;
+		}
+	} else {
+		/* signal bit not set -> a real page is missing. */
+		if (WARN_ON_ONCE(tsk != current))
+			goto out;
+		if (tsk->thread.pfault_wait == 1) {
+			/* Already on the list with a reference: put to sleep */
+			goto block;
+		} else if (tsk->thread.pfault_wait == -1) {
+			/*
+			 * Completion interrupt was faster than the initial
+			 * interrupt (pfault_wait == -1). Set pfault_wait
+			 * back to zero and exit.
+			 */
+			tsk->thread.pfault_wait = 0;
+		} else {
+			/*
+			 * Initial interrupt arrived before completion
+			 * interrupt. Let the task sleep.
+			 * An extra task reference is needed since a different
+			 * cpu may set the task state to TASK_RUNNING again
+			 * before the scheduler is reached.
+			 */
+			get_task_struct(tsk);
+			tsk->thread.pfault_wait = 1;
+			list_add(&tsk->thread.list, &pfault_list);
+block:
+			/*
+			 * Since this must be a userspace fault, there
+			 * is no kernel task state to trample. Rely on the
+			 * return to userspace schedule() to block.
+			 */
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+			set_tsk_need_resched(tsk);
+			set_preempt_need_resched();
+		}
+	}
+out:
+	spin_unlock(&pfault_lock);
+	put_task_struct(tsk);
+}
+
+static int pfault_cpu_dead(unsigned int cpu)
+{
+	struct thread_struct *thread, *next;
+	struct task_struct *tsk;
+
+	spin_lock_irq(&pfault_lock);
+	list_for_each_entry_safe(thread, next, &pfault_list, list) {
+		thread->pfault_wait = 0;
+		list_del(&thread->list);
+		tsk = container_of(thread, struct task_struct, thread);
+		wake_up_process(tsk);
+		put_task_struct(tsk);
+	}
+	spin_unlock_irq(&pfault_lock);
+	return 0;
+}
+
+static int __init pfault_irq_init(void)
+{
+	int rc;
+
+	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+	if (rc)
+		goto out_extint;
+	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+	if (rc)
+		goto out_pfault;
+	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+				  NULL, pfault_cpu_dead);
+	return 0;
+
+out_pfault:
+	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+	pfault_disable = 1;
+	return rc;
+}
+early_initcall(pfault_irq_init);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 2de48b2c1b04..b449fd2605b0 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -10,70 +10,41 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <asm/mmu_context.h>
+#include <asm/page-states.h>
 #include <asm/pgalloc.h>
-#include <asm/gmap.h>
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-#ifdef CONFIG_PGSTE
-
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
-	{
-		.procname	= "allocate_pgste",
-		.data		= &page_table_allocate_pgste,
-		.maxlen		= sizeof(int),
-		.mode		= S_IRUGO | S_IWUSR,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{ }
-};
-
-static struct ctl_table page_table_sysctl_dir[] = {
-	{
-		.procname	= "vm",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= page_table_sysctl,
-	},
-	{ }
-};
-
-static int __init page_table_register_sysctl(void)
-{
-	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#endif /* CONFIG_PGSTE */
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
-	struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+	unsigned long *table;
 
-	if (!page)
+	if (!ptdesc)
 		return NULL;
-	arch_set_page_dat(page, CRST_ALLOC_ORDER);
-	return (unsigned long *) page_to_virt(page);
+	table = ptdesc_to_virt(ptdesc);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+	return table;
 }
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+	if (!table)
+		return;
+	pagetable_free(virt_to_ptdesc(table));
 }
 
 static void __crst_table_upgrade(void *arg)
 {
 	struct mm_struct *mm = arg;
+	struct ctlreg asce;
 
 	/* change all active ASCEs to avoid the creation of new TLBs */
 	if (current->active_mm == mm) {
-		S390_lowcore.user_asce = mm->context.asce;
-		__ctl_load(S390_lowcore.user_asce, 7, 7);
+		asce.val = mm->context.asce;
+		get_lowcore()->user_asce = asce;
+		local_ctl_load(7, &asce);
+		if (!test_thread_flag(TIF_ASCE_PRIMARY))
+			local_ctl_load(1, &asce);
 	}
 	__tlb_flush_local();
 }
@@ -83,6 +54,8 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
 	unsigned long asce_limit = mm->context.asce_limit;
 
+	mmap_assert_write_locked(mm);
+
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
 	VM_BUG_ON(asce_limit < _REGION2_SIZE);
 
@@ -94,23 +67,18 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 		if (unlikely(!p4d))
 			goto err_p4d;
 		crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
+		pagetable_p4d_ctor(virt_to_ptdesc(p4d));
 	}
 	if (end > _REGION1_SIZE) {
 		pgd = crst_table_alloc(mm);
 		if (unlikely(!pgd))
 			goto err_pgd;
 		crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+		pagetable_pgd_ctor(virt_to_ptdesc(pgd));
 	}
 
 	spin_lock_bh(&mm->page_table_lock);
 
-	/*
-	 * This routine gets called with mmap_lock lock held and there is
-	 * no reason to optimize for the case of otherwise. However, if
-	 * that would ever change, the below check will let us know.
-	 */
-	VM_BUG_ON(asce_limit != mm->context.asce_limit);
-
 	if (p4d) {
 		__pgd = (unsigned long *) mm->pgd;
 		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
@@ -136,292 +104,82 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 	return 0;
 
 err_pgd:
+	pagetable_dtor(virt_to_ptdesc(p4d));
 	crst_table_free(mm, p4d);
 err_p4d:
 	return -ENOMEM;
 }
 
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
-	unsigned int old, new;
-
-	do {
-		old = atomic_read(v);
-		new = old ^ bits;
-	} while (atomic_cmpxchg(v, old, new) != old);
-	return new;
-}
-
 #ifdef CONFIG_PGSTE
 
-struct page *page_table_alloc_pgste(struct mm_struct *mm)
+struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
 {
-	struct page *page;
+	struct ptdesc *ptdesc;
 	u64 *table;
 
-	page = alloc_page(GFP_KERNEL);
-	if (page) {
-		table = (u64 *)page_to_virt(page);
+	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+	if (ptdesc) {
+		table = (u64 *)ptdesc_to_virt(ptdesc);
+		__arch_set_page_dat(table, 1);
 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	}
-	return page;
+	return ptdesc;
 }
 
-void page_table_free_pgste(struct page *page)
+void page_table_free_pgste(struct ptdesc *ptdesc)
 {
-	__free_page(page);
+	pagetable_free(ptdesc);
 }
 
 #endif /* CONFIG_PGSTE */
 
-/*
- * A 2KB-pgtable is either upper or lower half of a normal page.
- * The second half of the page may be unused or used as another
- * 2KB-pgtable.
- *
- * Whenever possible the parent page for a new 2KB-pgtable is picked
- * from the list of partially allocated pages mm_context_t::pgtable_list.
- * In case the list is empty a new parent page is allocated and added to
- * the list.
- *
- * When a parent page gets fully allocated it contains 2KB-pgtables in both
- * upper and lower halves and is removed from mm_context_t::pgtable_list.
- *
- * When 2KB-pgtable is freed from to fully allocated parent page that
- * page turns partially allocated and added to mm_context_t::pgtable_list.
- *
- * If 2KB-pgtable is freed from the partially allocated parent page that
- * page turns unused and gets removed from mm_context_t::pgtable_list.
- * Furthermore, the unused parent page is released.
- *
- * As follows from the above, no unallocated or fully allocated parent
- * pages are contained in mm_context_t::pgtable_list.
- *
- * The upper byte (bits 24-31) of the parent page _refcount is used
- * for tracking contained 2KB-pgtables and has the following format:
- *
- *   PP  AA
- * 01234567    upper byte (bits 24-31) of struct page::_refcount
- *   ||  ||
- *   ||  |+--- upper 2KB-pgtable is allocated
- *   ||  +---- lower 2KB-pgtable is allocated
- *   |+------- upper 2KB-pgtable is pending for removal
- *   +-------- lower 2KB-pgtable is pending for removal
- *
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
- * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
- * The parent page is either:
- *   - added to mm_context_t::pgtable_list in case the second half of the
- *     parent page is still unallocated;
- *   - removed from mm_context_t::pgtable_list in case both hales of the
- *     parent page are allocated;
- * These operations are protected with mm_context_t::lock.
- *
- * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
- * and the corresponding PP bit is set to 1 in a single atomic operation.
- * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
- * exclusive and may never be both set to 1!
- * The parent page is either:
- *   - added to mm_context_t::pgtable_list in case the second half of the
- *     parent page is still allocated;
- *   - removed from mm_context_t::pgtable_list in case the second half of
- *     the parent page is unallocated;
- * These operations are protected with mm_context_t::lock.
- *
- * It is important to understand that mm_context_t::lock only protects
- * mm_context_t::pgtable_list and AA bits, but not the parent page itself
- * and PP bits.
- *
- * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
- * while both AA bits and the second PP bit are already unset. Then the
- * parent page does not contain any 2KB-pgtable fragment anymore, and it has
- * also been removed from mm_context_t::pgtable_list. It is safe to release
- * the page therefore.
- *
- * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
- * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
- * while the PP bits are never used, nor such a page is added to or removed
- * from mm_context_t::pgtable_list.
- */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
+	struct ptdesc *ptdesc;
 	unsigned long *table;
-	struct page *page;
-	unsigned int mask, bit;
-
-	/* Try to get a fragment of a 4K page as a 2K page table */
-	if (!mm_alloc_pgste(mm)) {
-		table = NULL;
-		spin_lock_bh(&mm->context.lock);
-		if (!list_empty(&mm->context.pgtable_list)) {
-			page = list_first_entry(&mm->context.pgtable_list,
-						struct page, lru);
-			mask = atomic_read(&page->_refcount) >> 24;
-			/*
-			 * The pending removal bits must also be checked.
-			 * Failure to do so might lead to an impossible
-			 * value of (i.e 0x13 or 0x23) written to _refcount.
-			 * Such values violate the assumption that pending and
-			 * allocation bits are mutually exclusive, and the rest
-			 * of the code unrails as result. That could lead to
-			 * a whole bunch of races and corruptions.
-			 */
-			mask = (mask | (mask >> 4)) & 0x03U;
-			if (mask != 0x03U) {
-				table = (unsigned long *) page_to_virt(page);
-				bit = mask & 1;		/* =1 -> second 2K */
-				if (bit)
-					table += PTRS_PER_PTE;
-				atomic_xor_bits(&page->_refcount,
-							0x01U << (bit + 24));
-				list_del(&page->lru);
-			}
-		}
-		spin_unlock_bh(&mm->context.lock);
-		if (table)
-			return table;
-	}
-	/* Allocate a fresh page */
-	page = alloc_page(GFP_KERNEL);
-	if (!page)
+
+	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
+	if (!ptdesc)
 		return NULL;
-	if (!pgtable_pte_page_ctor(page)) {
-		__free_page(page);
+	if (!pagetable_pte_ctor(mm, ptdesc)) {
+		pagetable_free(ptdesc);
 		return NULL;
 	}
-	arch_set_page_dat(page, 0);
-	/* Initialize page table */
-	table = (unsigned long *) page_to_virt(page);
-	if (mm_alloc_pgste(mm)) {
-		/* Return 4K page table with PGSTEs */
-		atomic_xor_bits(&page->_refcount, 0x03U << 24);
-		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
-		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
-	} else {
-		/* Return the first 2K fragment of the page */
-		atomic_xor_bits(&page->_refcount, 0x01U << 24);
-		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
-		spin_lock_bh(&mm->context.lock);
-		list_add(&page->lru, &mm->context.pgtable_list);
-		spin_unlock_bh(&mm->context.lock);
-	}
+	table = ptdesc_to_virt(ptdesc);
+	__arch_set_page_dat(table, 1);
+	memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+	memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
 	return table;
 }
 
-static void page_table_release_check(struct page *page, void *table,
-				     unsigned int half, unsigned int mask)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	char msg[128];
+	struct ptdesc *ptdesc = virt_to_ptdesc(table);
 
-	if (!IS_ENABLED(CONFIG_DEBUG_VM) || !mask)
-		return;
-	snprintf(msg, sizeof(msg),
-		 "Invalid pgtable %p release half 0x%02x mask 0x%02x",
-		 table, half, mask);
-	dump_page(page, msg);
+	pagetable_dtor_free(ptdesc);
 }
 
-void page_table_free(struct mm_struct *mm, unsigned long *table)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
 {
-	unsigned int mask, bit, half;
-	struct page *page;
-
-	page = virt_to_page(table);
-	if (!mm_alloc_pgste(mm)) {
-		/* Free 2K page table fragment of a 4K page */
-		bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
-		spin_lock_bh(&mm->context.lock);
-		/*
-		 * Mark the page for delayed release. The actual release
-		 * will happen outside of the critical section from this
-		 * function or from __tlb_remove_table()
-		 */
-		mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
-		mask >>= 24;
-		if (mask & 0x03U)
-			list_add(&page->lru, &mm->context.pgtable_list);
-		else
-			list_del(&page->lru);
-		spin_unlock_bh(&mm->context.lock);
-		mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
-		mask >>= 24;
-		if (mask != 0x00U)
-			return;
-		half = 0x01U << bit;
-	} else {
-		half = 0x03U;
-		mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
-		mask >>= 24;
-	}
+	struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
 
-	page_table_release_check(page, table, half, mask);
-	pgtable_pte_page_dtor(page);
-	__free_page(page);
+	pagetable_dtor_free(ptdesc);
 }
 
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
-			 unsigned long vmaddr)
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
 {
-	struct mm_struct *mm;
-	struct page *page;
-	unsigned int bit, mask;
-
-	mm = tlb->mm;
-	page = virt_to_page(table);
-	if (mm_alloc_pgste(mm)) {
-		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) ((unsigned long)table | 0x03U);
-		tlb_remove_table(tlb, table);
-		return;
-	}
-	bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
-	spin_lock_bh(&mm->context.lock);
+	struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
+
+	call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
 	/*
-	 * Mark the page for delayed release. The actual release will happen
-	 * outside of the critical section from __tlb_remove_table() or from
-	 * page_table_free()
+	 * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
+	 * Turn to the generic pte_free_defer() version once gmap is removed.
 	 */
-	mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
-	mask >>= 24;
-	if (mask & 0x03U)
-		list_add_tail(&page->lru, &mm->context.pgtable_list);
-	else
-		list_del(&page->lru);
-	spin_unlock_bh(&mm->context.lock);
-	table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
-	tlb_remove_table(tlb, table);
-}
-
-void __tlb_remove_table(void *_table)
-{
-	unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
-	void *table = (void *)((unsigned long) _table ^ mask);
-	struct page *page = virt_to_page(table);
-
-	switch (half) {
-	case 0x00U:	/* pmd, pud, or p4d */
-		free_pages((unsigned long)table, CRST_ALLOC_ORDER);
-		return;
-	case 0x01U:	/* lower 2K of a 4K page table */
-	case 0x02U:	/* higher 2K of a 4K page table */
-		mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
-		mask >>= 24;
-		if (mask != 0x00U)
-			return;
-		break;
-	case 0x03U:	/* 4K page table with pgstes */
-		mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
-		mask >>= 24;
-		break;
-	}
-
-	page_table_release_check(page, table, half, mask);
-	pgtable_pte_page_dtor(page);
-	__free_page(page);
+	WARN_ON_ONCE(mm_has_pgste(mm));
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
  * Base infrastructure required to generate basic asces, region, segment,
@@ -448,16 +206,21 @@ static void base_pgt_free(unsigned long *table)
 static unsigned long *base_crst_alloc(unsigned long val)
 {
 	unsigned long *table;
+	struct ptdesc *ptdesc;
 
-	table =	(unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
-	if (table)
-		crst_table_init(table, val);
+	ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+	if (!ptdesc)
+		return NULL;
+	table = ptdesc_address(ptdesc);
+	crst_table_init(table, val);
 	return table;
 }
 
 static void base_crst_free(unsigned long *table)
 {
-	free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+	if (!table)
+		return;
+	pagetable_free(virt_to_ptdesc(table));
 }
 
 #define BASE_ADDR_END_FUNC(NAME, SIZE)					\
@@ -469,7 +232,7 @@ static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
 	return (next - 1) < (end - 1) ? next : end;			\
 }
 
-BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
+BASE_ADDR_END_FUNC(page,    PAGE_SIZE)
 BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
 BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
 BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
@@ -493,7 +256,7 @@ static int base_page_walk(unsigned long *origin, unsigned long addr,
 	if (!alloc)
 		return 0;
 	pte = origin;
-	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+	pte += (addr & _PAGE_INDEX) >> PAGE_SHIFT;
 	do {
 		next = base_page_addr_end(addr, end);
 		*pte = base_lra(addr);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4909dcd762e8..7df70cd8f739 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -4,6 +4,7 @@
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
+#include <linux/cpufeature.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -19,10 +20,10 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
+#include <asm/machine.h>
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
 {
@@ -34,22 +35,12 @@ pgprot_t pgprot_writecombine(pgprot_t prot)
 }
 EXPORT_SYMBOL_GPL(pgprot_writecombine);
 
-pgprot_t pgprot_writethrough(pgprot_t prot)
-{
-	/*
-	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
-	 * once at init and only read afterwards.
-	 */
-	return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
-}
-EXPORT_SYMBOL_GPL(pgprot_writethrough);
-
 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
 				   pte_t *ptep, int nodat)
 {
 	unsigned long opt, asce;
 
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		opt = 0;
 		asce = READ_ONCE(mm->context.gmap_asce);
 		if (asce == 0UL || nodat)
@@ -69,7 +60,7 @@ static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
 {
 	unsigned long opt, asce;
 
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		opt = 0;
 		asce = READ_ONCE(mm->context.gmap_asce);
 		if (asce == 0UL || nodat)
@@ -94,7 +85,7 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		ptep_ipte_local(mm, addr, ptep, nodat);
 	else
@@ -125,32 +116,23 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
-	unsigned long new = 0;
+	unsigned long value = 0;
 #ifdef CONFIG_PGSTE
-	unsigned long old;
-
-	asm(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
-		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
-		"	csg	%0,%1,%2\n"
-		"	jl	0b\n"
-		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
-		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+	unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+	do {
+		value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
+	} while (value & PGSTE_PCL_BIT);
+	value |= PGSTE_PCL_BIT;
 #endif
-	return __pgste(new);
+	return __pgste(value);
 }
 
 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
-	asm(
-		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
-		"	stg	%1,%0\n"
-		: "=Q" (ptep[PTRS_PER_PTE])
-		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
-		: "cc", "memory");
+	barrier();
+	WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
 #endif
 }
 
@@ -182,10 +164,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
 	skey = (unsigned long) page_get_storage_key(address);
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	/* Transfer page changed & referenced bit to guest bits in pgste */
-	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
+	pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
 	/* Copy page access key and fetch protection bit to pgste */
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
 #endif
 	return pgste;
 
@@ -219,7 +201,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 	if ((pte_val(entry) & _PAGE_PRESENT) &&
 	    (pte_val(entry) & _PAGE_WRITE) &&
 	    !(pte_val(entry) & _PAGE_INVALID)) {
-		if (!MACHINE_HAS_ESOP) {
+		if (!machine_has_esop()) {
 			/*
 			 * Without enhanced suppression-on-protection force
 			 * the dirty bit on for all writable ptes.
@@ -229,7 +211,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 		}
 		if (!(pte_val(entry) & _PAGE_PROTECT))
 			/* This pte allows write access, set user-dirty */
-			pgste_val(pgste) |= PGSTE_UC_BIT;
+			pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
 	}
 #endif
 	set_pte(ptep, entry);
@@ -245,7 +227,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
 
 	bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
 	if (bits) {
-		pgste_val(pgste) ^= bits;
+		pgste = __pgste(pgste_val(pgste) ^ bits);
 		ptep_notify(mm, addr, ptep, bits);
 	}
 #endif
@@ -302,6 +284,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(ptep_xchg_direct);
 
+/*
+ * Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
+ * RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
+ */
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+			 pte_t new)
+{
+	preempt_disable();
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__ptep_rdp(addr, ptep, 0, 0, 1);
+	else
+		__ptep_rdp(addr, ptep, 0, 0, 0);
+	/*
+	 * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
+	 * means it is still valid and active, and must not be changed according
+	 * to the architecture. But writing a new value that only differs in SW
+	 * bits is allowed.
+	 */
+	set_pte(ptep, new);
+	atomic_dec(&mm->context.flush_count);
+	preempt_enable();
+}
+EXPORT_SYMBOL(ptep_reset_dat_prot);
+
 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t new)
 {
@@ -344,8 +351,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 	pgste_t pgste;
 	struct mm_struct *mm = vma->vm_mm;
 
-	if (!MACHINE_HAS_NX)
-		pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get(ptep);
 		pgste_set_key(ptep, pgste, pte, mm);
@@ -360,7 +365,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 static inline void pmdp_idte_local(struct mm_struct *mm,
 				   unsigned long addr, pmd_t *pmdp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_LOCAL);
 	else
@@ -372,12 +377,12 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
 static inline void pmdp_idte_global(struct mm_struct *mm,
 				    unsigned long addr, pmd_t *pmdp)
 {
-	if (MACHINE_HAS_TLB_GUEST) {
+	if (machine_has_tlb_guest()) {
 		__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_GLOBAL);
 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
-	} else if (MACHINE_HAS_IDTE) {
+	} else if (cpu_has_idte()) {
 		__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
 		if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 			gmap_pmdp_idte_global(mm, addr);
@@ -397,7 +402,7 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		pmdp_idte_local(mm, addr, pmdp);
 	else
@@ -454,7 +459,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
 		return -ENOENT;
 
 	/* Large PUDs are not supported yet. */
-	if (pud_large(*pud))
+	if (pud_leaf(*pud))
 		return -EFAULT;
 
 	*pmdp = pmd_offset(pud, addr);
@@ -491,7 +496,7 @@ EXPORT_SYMBOL(pmdp_xchg_lazy);
 static inline void pudp_idte_local(struct mm_struct *mm,
 				   unsigned long addr, pud_t *pudp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_LOCAL);
 	else
@@ -501,15 +506,15 @@ static inline void pudp_idte_local(struct mm_struct *mm,
 static inline void pudp_idte_global(struct mm_struct *mm,
 				    unsigned long addr, pud_t *pudp)
 {
-	if (MACHINE_HAS_TLB_GUEST)
+	if (machine_has_tlb_guest())
 		__pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 			    mm->context.asce, IDTE_GLOBAL);
-	else if (MACHINE_HAS_IDTE)
+	else if (cpu_has_idte())
 		__pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
 	else
 		/*
 		 * Invalid bit position is the same for pmd and pud, so we can
-		 * re-use _pmd_csp() here
+		 * reuse _pmd_csp() here
 		 */
 		__pmdp_csp((pmd_t *) pudp);
 }
@@ -523,7 +528,7 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
 	if (pud_val(old) & _REGION_ENTRY_INVALID)
 		return old;
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
+	if (cpu_has_tlb_lc() &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		pudp_idte_local(mm, addr, pudp);
 	else
@@ -595,7 +600,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	/* the mm_has_pgste() check is done in set_pte_at() */
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+	pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
@@ -608,7 +613,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) |= PGSTE_IN_BIT;
+	pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
 	pgste_set_unlock(ptep, pgste);
 	preempt_enable();
 }
@@ -653,7 +658,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
 		entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
 		entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
 	}
-	pgste_val(pgste) |= bit;
+	pgste = set_pgste_bit(pgste, bit);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
 	return 0;
@@ -673,7 +678,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
 	if (!(pte_val(spte) & _PAGE_INVALID) &&
 	    !((pte_val(spte) & _PAGE_PROTECT) &&
 	      !(pte_val(pte) & _PAGE_PROTECT))) {
-		pgste_val(spgste) |= PGSTE_VSIE_BIT;
+		spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
 		tpgste = pgste_get_lock(tptep);
 		tpte = __pte((pte_val(spte) & PAGE_MASK) |
 			     (pte_val(pte) & _PAGE_PROTECT));
@@ -705,9 +710,9 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 	if (!non_swap_entry(entry))
 		dec_mm_counter(mm, MM_SWAPENTS);
 	else if (is_migration_entry(entry)) {
-		struct page *page = pfn_swap_entry_to_page(entry);
+		struct folio *folio = pfn_swap_entry_folio(entry);
 
-		dec_mm_counter(mm, mm_counter(page));
+		dec_mm_counter(mm, mm_counter(folio));
 	}
 	free_swap_and_cache(entry);
 }
@@ -731,7 +736,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 		pte_clear(mm, addr, ptep);
 	}
 	if (reset)
-		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+		pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
 	pgste_set_unlock(ptep, pgste);
 	preempt_enable();
 }
@@ -744,8 +749,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	/* Clear storage key ACC and F, but set R/C */
 	preempt_disable();
 	pgste = pgste_get_lock(ptep);
-	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
-	pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
+	pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
 	ptev = pte_val(*ptep);
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
@@ -766,13 +771,13 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
 
 	pgste = pgste_get_lock(ptep);
 	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
-	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 		nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 		ptep_ipte_global(mm, addr, ptep, nodat);
-		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+		if (machine_has_esop() || !(pte_val(pte) & _PAGE_WRITE))
 			pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
 		else
 			pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
@@ -804,14 +809,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	default:
 		return -EFAULT;
 	}
-
+again:
 	ptl = pmd_lock(mm, pmdp);
 	if (!pmd_present(*pmdp)) {
 		spin_unlock(ptl);
 		return key ? -EFAULT : 0;
 	}
 
-	if (pmd_large(*pmdp)) {
+	if (pmd_leaf(*pmdp)) {
 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
 		paddr |= addr & ~HPAGE_MASK;
 		/*
@@ -825,12 +830,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	spin_unlock(ptl);
 
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
 	new = old = pgste_get_lock(ptep);
-	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
-			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
+				   PGSTE_ACC_BITS | PGSTE_FP_BIT);
 	keyul = (unsigned long) key;
-	pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
-	pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
+	new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 		unsigned long bits, skey;
 
@@ -841,12 +848,12 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 		/* Set storage key ACC and FP */
 		page_set_storage_key(paddr, skey, !nq);
 		/* Merge host changed & referenced into pgste  */
-		pgste_val(new) |= bits << 52;
+		new = set_pgste_bit(new, bits << 52);
 	}
 	/* changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) &
 	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
-		pgste_val(new) |= PGSTE_UC_BIT;
+		new = set_pgste_bit(new, PGSTE_UC_BIT);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
@@ -913,14 +920,14 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 	default:
 		return -EFAULT;
 	}
-
+again:
 	ptl = pmd_lock(mm, pmdp);
 	if (!pmd_present(*pmdp)) {
 		spin_unlock(ptl);
 		return 0;
 	}
 
-	if (pmd_large(*pmdp)) {
+	if (pmd_leaf(*pmdp)) {
 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
 		paddr |= addr & ~HPAGE_MASK;
 		cc = page_reset_referenced(paddr);
@@ -930,21 +937,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 	spin_unlock(ptl);
 
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
 	new = old = pgste_get_lock(ptep);
 	/* Reset guest reference bit only */
-	pgste_val(new) &= ~PGSTE_GR_BIT;
+	new = clear_pgste_bit(new, PGSTE_GR_BIT);
 
 	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 		paddr = pte_val(*ptep) & PAGE_MASK;
 		cc = page_reset_referenced(paddr);
 		/* Merge real referenced bit into host-set */
-		pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
+		new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
 	}
 	/* Reflect guest's logical view, not physical */
 	cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
 	/* Changing the guest storage key is considered a change of the page */
 	if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
-		pgste_val(new) |= PGSTE_UC_BIT;
+		new = set_pgste_bit(new, PGSTE_UC_BIT);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
@@ -975,14 +984,14 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	default:
 		return -EFAULT;
 	}
-
+again:
 	ptl = pmd_lock(mm, pmdp);
 	if (!pmd_present(*pmdp)) {
 		spin_unlock(ptl);
 		return 0;
 	}
 
-	if (pmd_large(*pmdp)) {
+	if (pmd_leaf(*pmdp)) {
 		paddr = pmd_val(*pmdp) & HPAGE_MASK;
 		paddr |= addr & ~HPAGE_MASK;
 		*key = page_get_storage_key(paddr);
@@ -992,6 +1001,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 	spin_unlock(ptl);
 
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+	if (!ptep)
+		goto again;
 	pgste = pgste_get_lock(ptep);
 	*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 	paddr = pte_val(*ptep) & PAGE_MASK;
@@ -1106,7 +1117,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 	if (res)
 		pgstev |= _PGSTE_GPS_ZERO;
 
-	pgste_val(pgste) = pgstev;
+	pgste = __pgste(pgstev);
 	pgste_set_unlock(ptep, pgste);
 	pte_unmap_unlock(ptep, ptl);
 	return res;
@@ -1139,8 +1150,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
 		return -EFAULT;
 	new = pgste_get_lock(ptep);
 
-	pgste_val(new) &= ~bits;
-	pgste_val(new) |= value & bits;
+	new = clear_pgste_bit(new, bits);
+	new = set_pgste_bit(new, value & bits);
 
 	pgste_set_unlock(ptep, new);
 	pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/physaddr.c b/arch/s390/mm/physaddr.c
new file mode 100644
index 000000000000..59de866c72d9
--- /dev/null
+++ b/arch/s390/mm/physaddr.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mmdebug.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+
+unsigned long __phys_addr(unsigned long x, bool is_31bit)
+{
+	VIRTUAL_BUG_ON(is_vmalloc_or_module_addr((void *)(x)));
+	x = __pa_nodebug(x);
+	if (is_31bit)
+		VIRTUAL_BUG_ON(x >> 31);
+	return x;
+}
+EXPORT_SYMBOL(__phys_addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c2583f921ca8..448dd6ed1069 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/memory_hotplug.h>
+#include <linux/cpufeature.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
@@ -11,13 +12,19 @@
 #include <linux/list.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
+#include <asm/page-states.h>
+#include <asm/abs_lowcore.h>
 #include <asm/cacheflush.h>
+#include <asm/maccess.h>
 #include <asm/nospec-branch.h>
+#include <asm/ctlreg.h>
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/set_memory.h>
+#include <asm/physmem_info.h>
 
 static DEFINE_MUTEX(vmem_mutex);
 
@@ -30,11 +37,15 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 	return memblock_alloc(size, size);
 }
 
-static void vmem_free_pages(unsigned long addr, int order)
+static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap)
 {
+	if (altmap) {
+		vmem_altmap_free(altmap, 1 << order);
+		return;
+	}
 	/* We don't expect boot memory to be removed ever. */
 	if (!slab_is_available() ||
-	    WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
+	    WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
 		return;
 	free_pages(addr, order);
 }
@@ -44,8 +55,10 @@ void *vmem_crst_alloc(unsigned long val)
 	unsigned long *table;
 
 	table = vmem_alloc_pages(CRST_ALLOC_ORDER);
-	if (table)
-		crst_table_init(table, val);
+	if (!table)
+		return NULL;
+	crst_table_init(table, val);
+	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
 	return table;
 }
 
@@ -61,6 +74,7 @@ pte_t __ref *vmem_pte_alloc(void)
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+	__arch_set_page_dat(pte, 1);
 	return pte;
 }
 
@@ -150,27 +164,25 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, bool add, bool direct)
+				  unsigned long end, bool add, bool direct,
+				  struct vmem_altmap *altmap)
 {
 	unsigned long prot, pages = 0;
 	int ret = -ENOMEM;
 	pte_t *pte;
 
 	prot = pgprot_val(PAGE_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_PAGE_NOEXEC;
-
 	pte = pte_offset_kernel(pmd, addr);
 	for (; addr < end; addr += PAGE_SIZE, pte++) {
 		if (!add) {
 			if (pte_none(*pte))
 				continue;
 			if (!direct)
-				vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
+				vmem_free_pages((unsigned long)pfn_to_virt(pte_pfn(*pte)), get_order(PAGE_SIZE), altmap);
 			pte_clear(&init_mm, addr, pte);
 		} else if (pte_none(*pte)) {
 			if (!direct) {
-				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+				void *new_page = vmemmap_alloc_block_buf(PAGE_SIZE, NUMA_NO_NODE, altmap);
 
 				if (!new_page)
 					goto out;
@@ -207,7 +219,8 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
-				  unsigned long end, bool add, bool direct)
+				  unsigned long end, bool add, bool direct,
+				  struct vmem_altmap *altmap)
 {
 	unsigned long next, prot, pages = 0;
 	int ret = -ENOMEM;
@@ -215,24 +228,21 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 	pte_t *pte;
 
 	prot = pgprot_val(SEGMENT_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_SEGMENT_ENTRY_NOEXEC;
-
 	pmd = pmd_offset(pud, addr);
 	for (; addr < end; addr = next, pmd++) {
 		next = pmd_addr_end(addr, end);
 		if (!add) {
 			if (pmd_none(*pmd))
 				continue;
-			if (pmd_large(*pmd)) {
+			if (pmd_leaf(*pmd)) {
 				if (IS_ALIGNED(addr, PMD_SIZE) &&
 				    IS_ALIGNED(next, PMD_SIZE)) {
 					if (!direct)
-						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
 					pmd_clear(pmd);
 					pages++;
 				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
-					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
 					pmd_clear(pmd);
 				}
 				continue;
@@ -240,12 +250,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 		} else if (pmd_none(*pmd)) {
 			if (IS_ALIGNED(addr, PMD_SIZE) &&
 			    IS_ALIGNED(next, PMD_SIZE) &&
-			    MACHINE_HAS_EDAT1 && addr && direct &&
+			    cpu_has_edat1() && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pmd(pmd, __pmd(__pa(addr) | prot));
 				pages++;
 				continue;
-			} else if (!direct && MACHINE_HAS_EDAT1) {
+			} else if (!direct && cpu_has_edat1()) {
 				void *new_page;
 
 				/*
@@ -255,7 +265,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 				 * page tables since vmemmap_populate gets
 				 * called for each section separately.
 				 */
-				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+				new_page = vmemmap_alloc_block_buf(PMD_SIZE, NUMA_NO_NODE, altmap);
 				if (new_page) {
 					set_pmd(pmd, __pmd(__pa(new_page) | prot));
 					if (!IS_ALIGNED(addr, PMD_SIZE) ||
@@ -269,12 +279,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 			if (!pte)
 				goto out;
 			pmd_populate(&init_mm, pmd, pte);
-		} else if (pmd_large(*pmd)) {
+		} else if (pmd_leaf(*pmd)) {
 			if (!direct)
 				vmemmap_use_sub_pmd(addr, next);
 			continue;
 		}
-		ret = modify_pte_table(pmd, addr, next, add, direct);
+		ret = modify_pte_table(pmd, addr, next, add, direct, altmap);
 		if (ret)
 			goto out;
 		if (!add)
@@ -289,27 +299,19 @@ out:
 
 static void try_free_pmd_table(pud_t *pud, unsigned long start)
 {
-	const unsigned long end = start + PUD_SIZE;
 	pmd_t *pmd;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-#ifdef CONFIG_KASAN
-	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
-		return;
-#endif
 	pmd = pmd_offset(pud, start);
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
 		if (!pmd_none(*pmd))
 			return;
-	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER, NULL);
 	pud_clear(pud);
 }
 
 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
-			    bool add, bool direct)
+			    bool add, bool direct, struct vmem_altmap *altmap)
 {
 	unsigned long next, prot, pages = 0;
 	int ret = -ENOMEM;
@@ -317,15 +319,13 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 	pmd_t *pmd;
 
 	prot = pgprot_val(REGION3_KERNEL);
-	if (!MACHINE_HAS_NX)
-		prot &= ~_REGION_ENTRY_NOEXEC;
 	pud = pud_offset(p4d, addr);
 	for (; addr < end; addr = next, pud++) {
 		next = pud_addr_end(addr, end);
 		if (!add) {
 			if (pud_none(*pud))
 				continue;
-			if (pud_large(*pud)) {
+			if (pud_leaf(*pud)) {
 				if (IS_ALIGNED(addr, PUD_SIZE) &&
 				    IS_ALIGNED(next, PUD_SIZE)) {
 					pud_clear(pud);
@@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 		} else if (pud_none(*pud)) {
 			if (IS_ALIGNED(addr, PUD_SIZE) &&
 			    IS_ALIGNED(next, PUD_SIZE) &&
-			    MACHINE_HAS_EDAT2 && addr && direct &&
+			    cpu_has_edat2() && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pud(pud, __pud(__pa(addr) | prot));
 				pages++;
@@ -346,10 +346,10 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 			if (!pmd)
 				goto out;
 			pud_populate(&init_mm, pud, pmd);
-		} else if (pud_large(*pud)) {
+		} else if (pud_leaf(*pud)) {
 			continue;
 		}
-		ret = modify_pmd_table(pud, addr, next, add, direct);
+		ret = modify_pmd_table(pud, addr, next, add, direct, altmap);
 		if (ret)
 			goto out;
 		if (!add)
@@ -364,29 +364,20 @@ out:
 
 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
 {
-	const unsigned long end = start + P4D_SIZE;
 	pud_t *pud;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-#ifdef CONFIG_KASAN
-	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
-		return;
-#endif
-
 	pud = pud_offset(p4d, start);
 	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 		if (!pud_none(*pud))
 			return;
 	}
-	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER, NULL);
 	p4d_clear(p4d);
 }
 
 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
-			    bool add, bool direct)
+			    bool add, bool direct, struct vmem_altmap *altmap)
 {
 	unsigned long next;
 	int ret = -ENOMEM;
@@ -405,7 +396,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
 				goto out;
 			p4d_populate(&init_mm, p4d, pud);
 		}
-		ret = modify_pud_table(p4d, addr, next, add, direct);
+		ret = modify_pud_table(p4d, addr, next, add, direct, altmap);
 		if (ret)
 			goto out;
 		if (!add)
@@ -418,29 +409,20 @@ out:
 
 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
 {
-	const unsigned long end = start + PGDIR_SIZE;
 	p4d_t *p4d;
 	int i;
 
-	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-	if (end > VMALLOC_START)
-		return;
-#ifdef CONFIG_KASAN
-	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
-		return;
-#endif
-
 	p4d = p4d_offset(pgd, start);
 	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
 		if (!p4d_none(*p4d))
 			return;
 	}
-	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER, NULL);
 	pgd_clear(pgd);
 }
 
 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
-			    bool direct)
+			    bool direct, struct vmem_altmap *altmap)
 {
 	unsigned long addr, next;
 	int ret = -ENOMEM;
@@ -449,6 +431,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
 
 	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
 		return -EINVAL;
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (WARN_ON_ONCE(end > __abs_lowcore))
+		return -EINVAL;
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
 		pgd = pgd_offset_k(addr);
@@ -462,7 +447,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
 				goto out;
 			pgd_populate(&init_mm, pgd, p4d);
 		}
-		ret = modify_p4d_table(pgd, addr, next, add, direct);
+		ret = modify_p4d_table(pgd, addr, next, add, direct, altmap);
 		if (ret)
 			goto out;
 		if (!add)
@@ -475,14 +460,16 @@ out:
 	return ret;
 }
 
-static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+static int add_pagetable(unsigned long start, unsigned long end, bool direct,
+			 struct vmem_altmap *altmap)
 {
-	return modify_pagetable(start, end, true, direct);
+	return modify_pagetable(start, end, true, direct, altmap);
 }
 
-static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct,
+			    struct vmem_altmap *altmap)
 {
-	return modify_pagetable(start, end, false, direct);
+	return modify_pagetable(start, end, false, direct, altmap);
 }
 
 /*
@@ -490,7 +477,8 @@ static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
  */
 static int vmem_add_range(unsigned long start, unsigned long size)
 {
-	return add_pagetable(start, start + size, true);
+	start = (unsigned long)__va(start);
+	return add_pagetable(start, start + size, true, NULL);
 }
 
 /*
@@ -498,7 +486,8 @@ static int vmem_add_range(unsigned long start, unsigned long size)
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
-	remove_pagetable(start, start + size, true);
+	start = (unsigned long)__va(start);
+	remove_pagetable(start, start + size, true, NULL);
 }
 
 /*
@@ -511,21 +500,25 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 
 	mutex_lock(&vmem_mutex);
 	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
-	ret = add_pagetable(start, end, false);
+	ret = add_pagetable(start, end, false, altmap);
 	if (ret)
-		remove_pagetable(start, end, false);
+		remove_pagetable(start, end, false, altmap);
 	mutex_unlock(&vmem_mutex);
 	return ret;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+
 void vmemmap_free(unsigned long start, unsigned long end,
 		  struct vmem_altmap *altmap)
 {
 	mutex_lock(&vmem_mutex);
-	remove_pagetable(start, end, false);
+	remove_pagetable(start, end, false, altmap);
 	mutex_unlock(&vmem_mutex);
 }
 
+#endif
+
 void vmem_remove_mapping(unsigned long start, unsigned long size)
 {
 	mutex_lock(&vmem_mutex);
@@ -538,7 +531,7 @@ struct range arch_get_mappable_range(void)
 	struct range mhp_range;
 
 	mhp_range.start = 0;
-	mhp_range.end =  VMEM_MAX_PHYS - 1;
+	mhp_range.end = max_mappable - 1;
 	return mhp_range;
 }
 
@@ -561,33 +554,116 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
 }
 
 /*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserved for 4KB mappings only.
  */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+	pte_t *ptep = NULL;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd)) {
+		if (!alloc)
+			goto out;
+		p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+		if (!p4d)
+			goto out;
+		pgd_populate(&init_mm, pgd, p4d);
+	}
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		if (!alloc)
+			goto out;
+		pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+		if (!pud)
+			goto out;
+		p4d_populate(&init_mm, p4d, pud);
+	}
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		if (!alloc)
+			goto out;
+		pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+		if (!pmd)
+			goto out;
+		pud_populate(&init_mm, pud, pmd);
+	} else if (WARN_ON_ONCE(pud_leaf(*pud))) {
+		goto out;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		if (!alloc)
+			goto out;
+		pte = vmem_pte_alloc();
+		if (!pte)
+			goto out;
+		pmd_populate(&init_mm, pmd, pte);
+	} else if (WARN_ON_ONCE(pmd_leaf(*pmd))) {
+		goto out;
+	}
+	ptep = pte_offset_kernel(pmd, addr);
+out:
+	return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+	pte_t *ptep, pte;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE))
+		return -EINVAL;
+	ptep = vmem_get_alloc_pte(addr, alloc);
+	if (!ptep)
+		return -ENOMEM;
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte = mk_pte_phys(phys, prot);
+	set_pte(ptep, pte);
+	return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	int rc;
+
+	mutex_lock(&vmem_mutex);
+	rc = __vmem_map_4k_page(addr, phys, prot, true);
+	mutex_unlock(&vmem_mutex);
+	return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+	pte_t *ptep;
+
+	mutex_lock(&vmem_mutex);
+	ptep = virt_to_kpte(addr);
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte_clear(&init_mm, addr, ptep);
+	mutex_unlock(&vmem_mutex);
+}
+
 void __init vmem_map_init(void)
 {
-	phys_addr_t base, end;
-	u64 i;
-
-	for_each_mem_range(i, &base, &end)
-		vmem_add_range(base, end - base);
-	__set_memory((unsigned long)_stext,
-		     (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
-		     SET_MEMORY_RO | SET_MEMORY_X);
-	__set_memory((unsigned long)_etext,
-		     (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
-		     SET_MEMORY_RO);
-	__set_memory((unsigned long)_sinittext,
-		     (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
-		     SET_MEMORY_RO | SET_MEMORY_X);
-	__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
-		     SET_MEMORY_RO | SET_MEMORY_X);
-
-	/* lowcore must be executable for LPSWE */
-	if (!static_key_enabled(&cpu_has_bear))
+	__set_memory_rox(_stext, _etext);
+	__set_memory_ro(_etext, __end_rodata);
+	__set_memory_rox(__stext_amode31, __etext_amode31);
+	/*
+	 * If the BEAR-enhancement facility is not installed the first
+	 * prefix page is used to return to the previous context with
+	 * an LPSWE instruction and therefore must be executable.
+	 */
+	if (!cpu_has_bear())
 		set_memory_x(0, 1);
-
+	if (debug_pagealloc_enabled())
+		__set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size);
 	pr_info("Write protected kernel read-only data: %luk\n",
 		(unsigned long)(__end_rodata - _stext) >> 10);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index af35052d06ed..c7f8313ba449 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -30,11 +30,13 @@
 #include <asm/facility.h>
 #include <asm/nospec-branch.h>
 #include <asm/set_memory.h>
+#include <asm/text-patching.h>
+#include <asm/unwind.h>
 #include "bpf_jit.h"
 
 struct bpf_jit {
 	u32 seen;		/* Flags to remember seen eBPF instructions */
-	u32 seen_reg[16];	/* Array to remember which registers are used */
+	u16 seen_regs;		/* Mask to remember which registers are used */
 	u32 *addrs;		/* Array with relative instruction addresses */
 	u8 *prg_buf;		/* Start of program */
 	int size;		/* Size of program and literal pool */
@@ -46,18 +48,21 @@ struct bpf_jit {
 	int lit64;		/* Current position in 64-bit literal pool */
 	int base_ip;		/* Base address for literal pool */
 	int exit_ip;		/* Address of exit */
-	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
-	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
 	int tail_call_start;	/* Tail call start offset */
 	int excnt;		/* Number of exception table entries */
+	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
+	int prologue_plt;	/* Start of prologue hotpatch PLT */
+	int kern_arena;		/* Pool offset of kernel arena address */
+	u64 user_arena;		/* User arena address */
 };
 
 #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
 #define SEEN_LITERAL	BIT(1)		/* code uses literals */
 #define SEEN_FUNC	BIT(2)		/* calls C functions */
-#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */
 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
 
+#define NVREGS		0xffc0		/* %r6-%r15 */
+
 /*
  * s390 registers
  */
@@ -68,6 +73,10 @@ struct bpf_jit {
 #define REG_0		REG_W0			/* Register 0 */
 #define REG_1		REG_W1			/* Register 1 */
 #define REG_2		BPF_REG_1		/* Register 2 */
+#define REG_3		BPF_REG_2		/* Register 3 */
+#define REG_4		BPF_REG_3		/* Register 4 */
+#define REG_7		BPF_REG_6		/* Register 7 */
+#define REG_8		BPF_REG_7		/* Register 8 */
 #define REG_14		BPF_REG_0		/* Register 14 */
 
 /*
@@ -112,8 +121,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 {
 	u32 r1 = reg2hex[b1];
 
-	if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
-		jit->seen_reg[r1] = 1;
+	if (r1 >= 6 && r1 <= 15)
+		jit->seen_regs |= (1 << r1);
+}
+
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+	return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+	if (jit->prg_buf)
+		return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+	return 0;
 }
 
 #define REG_SET_SEEN(b1)					\
@@ -121,8 +142,6 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 	reg_set_seen(jit, b1);					\
 })
 
-#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
-
 /*
  * EMIT macros for code generation
  */
@@ -192,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT4_PCREL_RIC(op, mask, target)			\
 ({								\
-	int __rel = ((target) - jit->prg) / 2;			\
+	int __rel = off_to_pcrel(jit, target) / 2;		\
 	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
 })
 
@@ -230,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
 ({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
 	       (op2) | (mask) << 12);				\
 	REG_SET_SEEN(b1);					\
@@ -239,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
 ({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	unsigned int rel = off_to_pcrel(jit, target) / 2;	\
 	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
 		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
 	REG_SET_SEEN(b1);					\
@@ -248,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
 
 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
 ({								\
-	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
+	int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
 	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+	u32 pc32dbl = (s32)(pcrel / 2);
+
+	_EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+	emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+	REG_SET_SEEN(b);
+}
+
 #define EMIT6_PCREL_RILB(op, b, target)				\
-({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
-	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
-	REG_SET_SEEN(b);					\
-})
+	emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
 
-#define EMIT6_PCREL_RIL(op, target)				\
-({								\
-	unsigned int rel = (int)((target) - jit->prg) / 2;	\
-	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr)			\
+	emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+	emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
 
 #define EMIT6_PCREL_RILC(op, mask, target)			\
-({								\
-	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
-})
+	emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr)		\
+	emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
 
 #define _EMIT6_IMM(op, imm)					\
 ({								\
@@ -430,12 +461,12 @@ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
 /*
  * Return first seen register (from start)
  */
-static int get_start(struct bpf_jit *jit, int start)
+static int get_start(u16 seen_regs, int start)
 {
 	int i;
 
 	for (i = start; i <= 15; i++) {
-		if (jit->seen_reg[i])
+		if (seen_regs & (1 << i))
 			return i;
 	}
 	return 0;
@@ -444,15 +475,15 @@ static int get_start(struct bpf_jit *jit, int start)
 /*
  * Return last seen register (from start) (gap >= 2)
  */
-static int get_end(struct bpf_jit *jit, int start)
+static int get_end(u16 seen_regs, int start)
 {
 	int i;
 
 	for (i = start; i < 15; i++) {
-		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+		if (!(seen_regs & (3 << i)))
 			return i - 1;
 	}
-	return jit->seen_reg[15] ? 15 : 14;
+	return (seen_regs & (1 << 15)) ? 15 : 14;
 }
 
 #define REGS_SAVE	1
@@ -461,8 +492,10 @@ static int get_end(struct bpf_jit *jit, int start)
  * Save and restore clobbered registers (6-15) on stack.
  * We save/restore registers in chunks with gap >= 2 registers.
  */
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
+static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
+			      u16 extra_regs)
 {
+	u16 seen_regs = jit->seen_regs | extra_regs;
 	const int last = 15, save_restore_size = 6;
 	int re = 6, rs;
 
@@ -476,10 +509,10 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 	}
 
 	do {
-		rs = get_start(jit, re);
+		rs = get_start(seen_regs, re);
 		if (!rs)
 			break;
-		re = get_end(jit, rs + 1);
+		re = get_end(seen_regs, rs + 1);
 		if (op == REGS_SAVE)
 			save_regs(jit, rs, re);
 		else
@@ -492,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
 {
 	if (size >= 6 && !is_valid_rel(size)) {
 		/* brcl 0xf,size */
-		EMIT6_PCREL_RIL(0xc0f4000000, size);
+		EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
 		size -= 6;
 	} else if (size >= 4 && is_valid_rel(size)) {
 		/* brc 0xf,size */
@@ -507,27 +540,78 @@ static void bpf_skip(struct bpf_jit *jit, int size)
 }
 
 /*
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+struct bpf_plt {
+	char code[16];
+	void *ret;
+	void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
+asm(
+	".pushsection .rodata\n"
+	"	.balign 8\n"
+	"bpf_plt:\n"
+	"	lgrl %r0,bpf_plt_ret\n"
+	"	lgrl %r1,bpf_plt_target\n"
+	"	br %r1\n"
+	"	.balign 8\n"
+	"bpf_plt_ret: .quad 0\n"
+	"bpf_plt_target: .quad 0\n"
+	"	.popsection\n"
+);
+
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
+{
+	memcpy(plt, &bpf_plt, sizeof(*plt));
+	plt->ret = ret;
+	plt->target = target;
+}
+
+/*
  * Emit function prologue
  *
  * Save registers and create stack frame if necessary.
- * See stack frame layout desription in "bpf_jit.h"!
+ * See stack frame layout description in "bpf_jit.h"!
  */
-static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+			     u32 stack_depth)
 {
-	if (jit->seen & SEEN_TAIL_CALL) {
+	/* No-op for hotpatching */
+	/* brcl 0,prologue_plt */
+	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+	jit->prologue_plt_ret = jit->prg;
+
+	if (!bpf_is_subprog(fp)) {
+		/* Initialize the tail call counter in the main program. */
 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
 	} else {
 		/*
-		 * There are no tail calls. Insert nops in order to have
-		 * tail_call_start at a predictable offset.
+		 * Skip the tail call counter initialization in subprograms.
+		 * Insert nops in order to have tail_call_start at a
+		 * predictable offset.
 		 */
 		bpf_skip(jit, 6);
 	}
 	/* Tail calls have to skip above initialization */
 	jit->tail_call_start = jit->prg;
-	/* Save registers */
-	save_restore_regs(jit, REGS_SAVE, stack_depth);
+	if (fp->aux->exception_cb) {
+		/*
+		 * Switch stack, the new address is in the 2nd parameter.
+		 *
+		 * Arrange the restoration of %r6-%r15 in the epilogue.
+		 * Do not restore them now, the prog does not need them.
+		 */
+		/* lgr %r15,%r3 */
+		EMIT4(0xb9040000, REG_15, REG_3);
+		jit->seen_regs |= NVREGS;
+	} else {
+		/* Save registers */
+		save_restore_regs(jit, REGS_SAVE, stack_depth,
+				  fp->aux->exception_boundary ? NVREGS : 0);
+	}
 	/* Setup literal pool */
 	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
 		if (!is_first_pass(jit) &&
@@ -543,21 +627,46 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 	}
 	/* Setup stack and backchain */
 	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
-		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
-			/* lgr %w1,%r15 (backchain) */
-			EMIT4(0xb9040000, REG_W1, REG_15);
+		/* lgr %w1,%r15 (backchain) */
+		EMIT4(0xb9040000, REG_W1, REG_15);
 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
-		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
-			/* stg %w1,152(%r15) (backchain) */
-			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
-				      REG_15, 152);
+		/* stg %w1,152(%r15) (backchain) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+			      REG_15, 152);
 	}
 }
 
 /*
+ * Jump using a register either directly or via an expoline thunk
+ */
+#define EMIT_JUMP_REG(reg) do {						\
+	if (nospec_uses_trampoline())					\
+		/* brcl 0xf,__s390_indirect_jump_rN */			\
+		EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f,			\
+				     __s390_indirect_jump_r ## reg);	\
+	else								\
+		/* br %rN */						\
+		_EMIT2(0x07f0 | reg);					\
+} while (0)
+
+/*
+ * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ */
+static void call_r1(struct bpf_jit *jit)
+{
+	if (nospec_uses_trampoline())
+		/* brasl %r14,__s390_indirect_jump_r1 */
+		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+				     __s390_indirect_jump_r1);
+	else
+		/* basr %r14,%r1 */
+		EMIT2(0x0d00, REG_14, REG_1);
+}
+
+/*
  * Function epilogue
  */
 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
@@ -566,72 +675,122 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 	/* Load exit code: lgr %r2,%b0 */
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
 	/* Restore registers */
-	save_restore_regs(jit, REGS_RESTORE, stack_depth);
-	if (nospec_uses_trampoline()) {
-		jit->r14_thunk_ip = jit->prg;
-		/* Generate __s390_indirect_jump_r14 thunk */
-		/* exrl %r0,.+10 */
-		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
-		/* j . */
-		EMIT4_PCREL(0xa7f40000, 0);
-	}
-	/* br %r14 */
-	_EMIT2(0x07fe);
-
-	if ((nospec_uses_trampoline()) &&
-	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
-		jit->r1_thunk_ip = jit->prg;
-		/* Generate __s390_indirect_jump_r1 thunk */
-		/* exrl %r0,.+10 */
-		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
-		/* j . */
-		EMIT4_PCREL(0xa7f40000, 0);
-		/* br %r1 */
-		_EMIT2(0x07f1);
+	save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
+	EMIT_JUMP_REG(14);
+
+	jit->prg = ALIGN(jit->prg, 8);
+	jit->prologue_plt = jit->prg;
+	if (jit->prg_buf)
+		bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
+			    jit->prg_buf + jit->prologue_plt_ret, NULL);
+	jit->prg += sizeof(struct bpf_plt);
+}
+
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
+{
+	regs->psw.addr = extable_fixup(x);
+	if (x->data != -1)
+		regs->gprs[x->data] = 0;
+	return true;
+}
+
+/*
+ * A single BPF probe instruction
+ */
+struct bpf_jit_probe {
+	int prg;	/* JITed instruction offset */
+	int nop_prg;	/* JITed nop offset */
+	int reg;	/* Register to clear on exception */
+	int arena_reg;	/* Register to use for arena addressing */
+};
+
+static void bpf_jit_probe_init(struct bpf_jit_probe *probe)
+{
+	probe->prg = -1;
+	probe->nop_prg = -1;
+	probe->reg = -1;
+	probe->arena_reg = REG_0;
+}
+
+/*
+ * Handlers of certain exceptions leave psw.addr pointing to the instruction
+ * directly after the failing one. Therefore, create two exception table
+ * entries and also add a nop in case two probing instructions come directly
+ * after each other.
+ */
+static void bpf_jit_probe_emit_nop(struct bpf_jit *jit,
+				   struct bpf_jit_probe *probe)
+{
+	if (probe->prg == -1 || probe->nop_prg != -1)
+		/* The probe is not armed or nop is already emitted. */
+		return;
+
+	probe->nop_prg = jit->prg;
+	/* bcr 0,%0 */
+	_EMIT2(0x0700);
+}
+
+static void bpf_jit_probe_load_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+				   struct bpf_jit_probe *probe)
+{
+	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+	    BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+		return;
+
+	if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+		/* lgrl %r1,kern_arena */
+		EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+		probe->arena_reg = REG_W1;
 	}
+	probe->prg = jit->prg;
+	probe->reg = reg2hex[insn->dst_reg];
 }
 
-static int get_probe_mem_regno(const u8 *insn)
+static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+				    struct bpf_jit_probe *probe)
 {
-	/*
-	 * insn must point to llgc, llgh, llgf or lg, which have destination
-	 * register at the same position.
-	 */
-	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
-		return -1;
-	if (insn[5] != 0x90 && /* llgc */
-	    insn[5] != 0x91 && /* llgh */
-	    insn[5] != 0x16 && /* llgf */
-	    insn[5] != 0x04) /* lg */
-		return -1;
-	return insn[1] >> 4;
+	if (BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+		return;
+
+	/* lgrl %r1,kern_arena */
+	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+	probe->arena_reg = REG_W1;
+	probe->prg = jit->prg;
 }
 
-bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
+static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit,
+				     struct bpf_insn *insn,
+				     struct bpf_jit_probe *probe)
 {
-	regs->psw.addr = extable_fixup(x);
-	regs->gprs[x->data] = 0;
-	return true;
+	if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
+		return;
+
+	/* lgrl %r1,kern_arena */
+	EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+	/* agr %r1,%dst */
+	EMIT4(0xb9080000, REG_W1, insn->dst_reg);
+	probe->arena_reg = REG_W1;
+	probe->prg = jit->prg;
 }
 
-static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
-			     int probe_prg, int nop_prg)
+static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
+			      struct bpf_jit_probe *probe)
 {
 	struct exception_table_entry *ex;
-	int reg, prg;
+	int i, prg;
 	s64 delta;
 	u8 *insn;
-	int i;
 
+	if (probe->prg == -1)
+		/* The probe is not armed. */
+		return 0;
+	bpf_jit_probe_emit_nop(jit, probe);
 	if (!fp->aux->extable)
 		/* Do nothing during early JIT passes. */
 		return 0;
-	insn = jit->prg_buf + probe_prg;
-	reg = get_probe_mem_regno(insn);
-	if (WARN_ON_ONCE(reg < 0))
-		/* JIT bug - unexpected probe instruction. */
-		return -1;
-	if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+	insn = jit->prg_buf + probe->prg;
+	if (WARN_ON_ONCE(probe->prg + insn_length(*insn) != probe->nop_prg))
 		/* JIT bug - gap between probe and nop instructions. */
 		return -1;
 	for (i = 0; i < 2; i++) {
@@ -640,29 +799,58 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
 			return -1;
 		ex = &fp->aux->extable[jit->excnt];
 		/* Add extable entries for probe and nop instructions. */
-		prg = i == 0 ? probe_prg : nop_prg;
+		prg = i == 0 ? probe->prg : probe->nop_prg;
 		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
 			/* JIT bug - code and extable must be close. */
 			return -1;
 		ex->insn = delta;
 		/*
-		 * Always land on the nop. Note that extable infrastructure
-		 * ignores fixup field, it is handled by ex_handler_bpf().
+		 * Land on the current instruction. Note that the extable
+		 * infrastructure ignores the fixup field; it is handled by
+		 * ex_handler_bpf().
 		 */
-		delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+		delta = jit->prg_buf + jit->prg - (u8 *)&ex->fixup;
 		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
 			/* JIT bug - landing pad and extable must be close. */
 			return -1;
 		ex->fixup = delta;
 		ex->type = EX_TYPE_BPF;
-		ex->data = reg;
+		ex->data = probe->reg;
 		jit->excnt++;
 	}
 	return 0;
 }
 
 /*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+{
+	if (!(flags & BTF_FMODEL_SIGNED_ARG))
+		return 0;
+
+	switch (size) {
+	case 1:
+		/* lgbr %r,%r */
+		EMIT4(0xb9060000, r, r);
+		return 0;
+	case 2:
+		/* lghr %r,%r */
+		EMIT4(0xb9070000, r, r);
+		return 0;
+	case 4:
+		/* lgfr %r,%r */
+		EMIT4(0xb9140000, r, r);
+		return 0;
+	case 8:
+		return 0;
+	default:
+		return -1;
+	}
+}
+
+/*
  * Compile one eBPF instruction into s390x code
  *
  * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
@@ -672,34 +860,80 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				 int i, bool extra_pass, u32 stack_depth)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
+	s32 branch_oc_off = insn->off;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
+	struct bpf_jit_probe probe;
 	int last, insn_count = 1;
 	u32 *addrs = jit->addrs;
 	s32 imm = insn->imm;
 	s16 off = insn->off;
-	int probe_prg = -1;
 	unsigned int mask;
-	int nop_prg;
 	int err;
 
-	if (BPF_CLASS(insn->code) == BPF_LDX &&
-	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
-		probe_prg = jit->prg;
+	bpf_jit_probe_init(&probe);
 
 	switch (insn->code) {
 	/*
 	 * BPF_MOV
 	 */
-	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
-		/* llgfr %dst,%src */
-		EMIT4(0xb9160000, dst_reg, src_reg);
-		if (insn_is_zext(&insn[1]))
-			insn_count = 2;
+	case BPF_ALU | BPF_MOV | BPF_X:
+		switch (insn->off) {
+		case 0: /* DST = (u32) SRC */
+			/* llgfr %dst,%src */
+			EMIT4(0xb9160000, dst_reg, src_reg);
+			if (insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case 8: /* DST = (u32)(s8) SRC */
+			/* lbr %dst,%src */
+			EMIT4(0xb9260000, dst_reg, src_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 16: /* DST = (u32)(s16) SRC */
+			/* lhr %dst,%src */
+			EMIT4(0xb9270000, dst_reg, src_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		}
 		break;
-	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
-		/* lgr %dst,%src */
-		EMIT4(0xb9040000, dst_reg, src_reg);
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		if (insn_is_cast_user(insn)) {
+			int patch_brc;
+
+			/* ltgr %dst,%src */
+			EMIT4(0xb9020000, dst_reg, src_reg);
+			/* brc 8,0f */
+			patch_brc = jit->prg;
+			EMIT4_PCREL_RIC(0xa7040000, 8, 0);
+			/* iihf %dst,user_arena>>32 */
+			EMIT6_IMM(0xc0080000, dst_reg, jit->user_arena >> 32);
+			/* 0: */
+			if (jit->prg_buf)
+				*(u16 *)(jit->prg_buf + patch_brc + 2) =
+					(jit->prg - patch_brc) >> 1;
+			break;
+		}
+		switch (insn->off) {
+		case 0: /* DST = SRC */
+			/* lgr %dst,%src */
+			EMIT4(0xb9040000, dst_reg, src_reg);
+			break;
+		case 8: /* DST = (s8) SRC */
+			/* lgbr %dst,%src */
+			EMIT4(0xb9060000, dst_reg, src_reg);
+			break;
+		case 16: /* DST = (s16) SRC */
+			/* lghr %dst,%src */
+			EMIT4(0xb9070000, dst_reg, src_reg);
+			break;
+		case 32: /* DST = (s32) SRC */
+			/* lgfr %dst,%src */
+			EMIT4(0xb9140000, dst_reg, src_reg);
+			break;
+		}
 		break;
 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
 		/* llilf %dst,imm */
@@ -808,66 +1042,115 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	/*
 	 * BPF_DIV / BPF_MOD
 	 */
-	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
-	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU | BPF_MOD | BPF_X:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
-		/* lhi %w0,0 */
-		EMIT4_IMM(0xa7080000, REG_W0, 0);
-		/* lr %w1,%dst */
-		EMIT2(0x1800, REG_W1, dst_reg);
-		/* dlr %w0,%src */
-		EMIT4(0xb9970000, REG_W0, src_reg);
+		switch (off) {
+		case 0: /* dst = (u32) dst {/,%} (u32) src */
+			/* xr %w0,%w0 */
+			EMIT2(0x1700, REG_W0, REG_W0);
+			/* lr %w1,%dst */
+			EMIT2(0x1800, REG_W1, dst_reg);
+			/* dlr %w0,%src */
+			EMIT4(0xb9970000, REG_W0, src_reg);
+			break;
+		case 1: /* dst = (u32) ((s32) dst {/,%} (s32) src) */
+			/* lgfr %r1,%dst */
+			EMIT4(0xb9140000, REG_W1, dst_reg);
+			/* dsgfr %r0,%src */
+			EMIT4(0xb91d0000, REG_W0, src_reg);
+			break;
+		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
-	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
-		/* lghi %w0,0 */
-		EMIT4_IMM(0xa7090000, REG_W0, 0);
-		/* lgr %w1,%dst */
-		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* dlgr %w0,%dst */
-		EMIT4(0xb9870000, REG_W0, src_reg);
+		switch (off) {
+		case 0: /* dst = dst {/,%} src */
+			/* lghi %w0,0 */
+			EMIT4_IMM(0xa7090000, REG_W0, 0);
+			/* lgr %w1,%dst */
+			EMIT4(0xb9040000, REG_W1, dst_reg);
+			/* dlgr %w0,%src */
+			EMIT4(0xb9870000, REG_W0, src_reg);
+			break;
+		case 1: /* dst = (s64) dst {/,%} (s64) src */
+			/* lgr %w1,%dst */
+			EMIT4(0xb9040000, REG_W1, dst_reg);
+			/* dsgr %w0,%src */
+			EMIT4(0xb90d0000, REG_W0, src_reg);
+			break;
+		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
-	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU | BPF_MOD | BPF_K:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
 		if (imm == 1) {
 			if (BPF_OP(insn->code) == BPF_MOD)
-				/* lhgi %dst,0 */
+				/* lghi %dst,0 */
 				EMIT4_IMM(0xa7090000, dst_reg, 0);
 			else
 				EMIT_ZERO(dst_reg);
 			break;
 		}
-		/* lhi %w0,0 */
-		EMIT4_IMM(0xa7080000, REG_W0, 0);
-		/* lr %w1,%dst */
-		EMIT2(0x1800, REG_W1, dst_reg);
 		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
-			/* dl %w0,<d(imm)>(%l) */
-			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
-				      EMIT_CONST_U32(imm));
+			switch (off) {
+			case 0: /* dst = (u32) dst {/,%} (u32) imm */
+				/* xr %w0,%w0 */
+				EMIT2(0x1700, REG_W0, REG_W0);
+				/* lr %w1,%dst */
+				EMIT2(0x1800, REG_W1, dst_reg);
+				/* dl %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U32(imm));
+				break;
+			case 1: /* dst = (s32) dst {/,%} (s32) imm */
+				/* lgfr %r1,%dst */
+				EMIT4(0xb9140000, REG_W1, dst_reg);
+				/* dsgf %r0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x001d, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U32(imm));
+				break;
+			}
 		} else {
-			/* lgfrl %dst,imm */
-			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
-					 _EMIT_CONST_U32(imm));
-			jit->seen |= SEEN_LITERAL;
-			/* dlr %w0,%dst */
-			EMIT4(0xb9970000, REG_W0, dst_reg);
+			switch (off) {
+			case 0: /* dst = (u32) dst {/,%} (u32) imm */
+				/* xr %w0,%w0 */
+				EMIT2(0x1700, REG_W0, REG_W0);
+				/* lr %w1,%dst */
+				EMIT2(0x1800, REG_W1, dst_reg);
+				/* lrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc40d0000, dst_reg,
+						 _EMIT_CONST_U32(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dlr %w0,%dst */
+				EMIT4(0xb9970000, REG_W0, dst_reg);
+				break;
+			case 1: /* dst = (s32) dst {/,%} (s32) imm */
+				/* lgfr %w1,%dst */
+				EMIT4(0xb9140000, REG_W1, dst_reg);
+				/* lgfrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+						 _EMIT_CONST_U32(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dsgr %w0,%dst */
+				EMIT4(0xb90d0000, REG_W0, dst_reg);
+				break;
+			}
 		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
@@ -875,8 +1158,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			insn_count = 2;
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
-	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -886,21 +1169,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 				EMIT4_IMM(0xa7090000, dst_reg, 0);
 			break;
 		}
-		/* lghi %w0,0 */
-		EMIT4_IMM(0xa7090000, REG_W0, 0);
-		/* lgr %w1,%dst */
-		EMIT4(0xb9040000, REG_W1, dst_reg);
 		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
-			/* dlg %w0,<d(imm)>(%l) */
-			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-				      EMIT_CONST_U64(imm));
+			switch (off) {
+			case 0: /* dst = dst {/,%} imm */
+				/* lghi %w0,0 */
+				EMIT4_IMM(0xa7090000, REG_W0, 0);
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* dlg %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U64(imm));
+				break;
+			case 1: /* dst = (s64) dst {/,%} (s64) imm */
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* dsg %w0,<d(imm)>(%l) */
+				EMIT6_DISP_LH(0xe3000000, 0x000d, REG_W0, REG_0,
+					      REG_L, EMIT_CONST_U64(imm));
+				break;
+			}
 		} else {
-			/* lgrl %dst,imm */
-			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
-					 _EMIT_CONST_U64(imm));
-			jit->seen |= SEEN_LITERAL;
-			/* dlgr %w0,%dst */
-			EMIT4(0xb9870000, REG_W0, dst_reg);
+			switch (off) {
+			case 0: /* dst = dst {/,%} imm */
+				/* lghi %w0,0 */
+				EMIT4_IMM(0xa7090000, REG_W0, 0);
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* lgrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+						 _EMIT_CONST_U64(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dlgr %w0,%dst */
+				EMIT4(0xb9870000, REG_W0, dst_reg);
+				break;
+			case 1: /* dst = (s64) dst {/,%} (s64) imm */
+				/* lgr %w1,%dst */
+				EMIT4(0xb9040000, REG_W1, dst_reg);
+				/* lgrl %dst,imm */
+				EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+						 _EMIT_CONST_U64(imm));
+				jit->seen |= SEEN_LITERAL;
+				/* dsgr %w0,%dst */
+				EMIT4(0xb90d0000, REG_W0, dst_reg);
+				break;
+			}
 		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
@@ -1113,6 +1425,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		}
 		break;
 	case BPF_ALU | BPF_END | BPF_FROM_LE:
+	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
 		switch (imm) {
 		case 16: /* dst = (u16) cpu_to_le16(dst) */
 			/* lrvr %dst,%dst */
@@ -1146,51 +1459,99 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 * BPF_ST(X)
 	 */
 	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
-		/* stcy %src,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* stcy %src,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
-		/* sthy %src,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* sthy %src,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
-		/* sty %src,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* sty %src,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
-		/* stg %src,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* stg %src,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
 		/* lhi %w0,imm */
 		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
-		/* stcy %w0,off(dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* stcy %w0,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
 		/* lhi %w0,imm */
 		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
-		/* sthy %w0,off(dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* sthy %w0,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
 		/* llilf %w0,imm  */
 		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
-		/* sty %w0,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* sty %w0,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
 		/* lgfi %w0,imm */
 		EMIT6_IMM(0xc0010000, REG_W0, imm);
-		/* stg %w0,off(%dst) */
-		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+		bpf_jit_probe_store_pre(jit, insn, &probe);
+		/* stg %w0,off(%dst,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		break;
 	/*
@@ -1198,17 +1559,36 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 */
 	case BPF_STX | BPF_ATOMIC | BPF_DW:
 	case BPF_STX | BPF_ATOMIC | BPF_W:
+	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
+	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
 	{
 		bool is32 = BPF_SIZE(insn->code) == BPF_W;
 
+		/*
+		 * Unlike loads and stores, atomics have only a base register,
+		 * but no index register. For the non-arena case, simply use
+		 * %dst as a base. For the arena case, use the work register
+		 * %r1: first, load the arena base into it, and then add %dst
+		 * to it.
+		 */
+		probe.arena_reg = dst_reg;
+
 		switch (insn->imm) {
-/* {op32|op64} {%w0|%src},%src,off(%dst) */
 #define EMIT_ATOMIC(op32, op64) do {					\
+	bpf_jit_probe_atomic_pre(jit, insn, &probe);			\
+	/* {op32|op64} {%w0|%src},%src,off(%arena) */			\
 	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
 		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
-		      src_reg, dst_reg, off);				\
-	if (is32 && (insn->imm & BPF_FETCH))				\
-		EMIT_ZERO(src_reg);					\
+		      src_reg, probe.arena_reg, off);			\
+	err = bpf_jit_probe_post(jit, fp, &probe);			\
+	if (err < 0)							\
+		return err;						\
+	if (insn->imm & BPF_FETCH) {					\
+		/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */	\
+		_EMIT2(0x07e0);						\
+		if (is32)                                               \
+			EMIT_ZERO(src_reg);				\
+	}								\
 } while (0)
 		case BPF_ADD:
 		case BPF_ADD | BPF_FETCH:
@@ -1231,25 +1611,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			EMIT_ATOMIC(0x00f7, 0x00e7);
 			break;
 #undef EMIT_ATOMIC
-		case BPF_XCHG:
-			/* {ly|lg} %w0,off(%dst) */
+		case BPF_XCHG: {
+			struct bpf_jit_probe load_probe = probe;
+			int loop_start;
+
+			bpf_jit_probe_atomic_pre(jit, insn, &load_probe);
+			/* {ly|lg} %w0,off(%arena) */
 			EMIT6_DISP_LH(0xe3000000,
 				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
-				      dst_reg, off);
-			/* 0: {csy|csg} %w0,%src,off(%dst) */
+				      load_probe.arena_reg, off);
+			bpf_jit_probe_emit_nop(jit, &load_probe);
+			/* Reuse {ly|lg}'s arena_reg for {csy|csg}. */
+			if (load_probe.prg != -1) {
+				probe.prg = jit->prg;
+				probe.arena_reg = load_probe.arena_reg;
+			}
+			loop_start = jit->prg;
+			/* 0: {csy|csg} %w0,%src,off(%arena) */
 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
-				      REG_W0, src_reg, dst_reg, off);
+				      REG_W0, src_reg, probe.arena_reg, off);
+			bpf_jit_probe_emit_nop(jit, &probe);
 			/* brc 4,0b */
-			EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
+			EMIT4_PCREL_RIC(0xa7040000, 4, loop_start);
 			/* {llgfr|lgr} %src,%w0 */
 			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
+			/* Both probes should land here on exception. */
+			err = bpf_jit_probe_post(jit, fp, &load_probe);
+			if (err < 0)
+				return err;
+			err = bpf_jit_probe_post(jit, fp, &probe);
+			if (err < 0)
+				return err;
 			if (is32 && insn_is_zext(&insn[1]))
 				insn_count = 2;
 			break;
+		}
 		case BPF_CMPXCHG:
-			/* 0: {csy|csg} %b0,%src,off(%dst) */
+			bpf_jit_probe_atomic_pre(jit, insn, &probe);
+			/* 0: {csy|csg} %b0,%src,off(%arena) */
 			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
-				      BPF_REG_0, src_reg, dst_reg, off);
+				      BPF_REG_0, src_reg,
+				      probe.arena_reg, off);
+			err = bpf_jit_probe_post(jit, fp, &probe);
+			if (err < 0)
+				return err;
 			break;
 		default:
 			pr_err("Unknown atomic operation %02x\n", insn->imm);
@@ -1264,42 +1669,97 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 */
 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
-		/* llgc %dst,0(off,%src) */
-		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* llgc %dst,off(%src,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* lgb %dst,off(%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
+		jit->seen |= SEEN_MEM;
+		break;
 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
-		/* llgh %dst,0(off,%src) */
-		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* llgh %dst,off(%src,%arena) */
+		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		jit->seen |= SEEN_MEM;
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* lgh %dst,off(%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
+		jit->seen |= SEEN_MEM;
+		break;
 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
 		/* llgf %dst,off(%src) */
 		jit->seen |= SEEN_MEM;
-		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		if (insn_is_zext(&insn[1]))
 			insn_count = 2;
 		break;
+	case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* lgf %dst,off(%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
+		break;
 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
-		/* lg %dst,0(off,%src) */
+	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+		bpf_jit_probe_load_pre(jit, insn, &probe);
+		/* lg %dst,off(%src,%arena) */
 		jit->seen |= SEEN_MEM;
-		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg,
+			      probe.arena_reg, off);
+		err = bpf_jit_probe_post(jit, fp, &probe);
+		if (err < 0)
+			return err;
 		break;
 	/*
 	 * BPF_JMP / CALL
 	 */
 	case BPF_JMP | BPF_CALL:
 	{
-		u64 func;
+		const struct btf_func_model *m;
 		bool func_addr_fixed;
-		int ret;
+		int j, ret;
+		u64 func;
 
 		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
 					    &func, &func_addr_fixed);
@@ -1308,15 +1768,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 
 		REG_SET_SEEN(BPF_REG_5);
 		jit->seen |= SEEN_FUNC;
+		/*
+		 * Copy the tail call counter to where the callee expects it.
+		 *
+		 * Note 1: The callee can increment the tail call counter, but
+		 * we do not load it back, since the x86 JIT does not do this
+		 * either.
+		 *
+		 * Note 2: We assume that the verifier does not let us call the
+		 * main program, which clears the tail call counter on entry.
+		 */
+		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
+		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
+		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+		/* Sign-extend the kfunc arguments. */
+		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+			m = bpf_jit_find_kfunc_model(fp, insn);
+			if (!m)
+				return -1;
+
+			for (j = 0; j < m->nr_args; j++) {
+				if (sign_extend(jit, BPF_REG_1 + j,
+						m->arg_size[j],
+						m->arg_flags[j]))
+					return -1;
+			}
+		}
+
 		/* lgrl %w1,func */
 		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
-		if (nospec_uses_trampoline()) {
-			/* brasl %r14,__s390_indirect_jump_r1 */
-			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
-		} else {
-			/* basr %r14,%w1 */
-			EMIT2(0x0d00, REG_14, REG_W1);
-		}
+		/* %r1() */
+		call_r1(jit);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
 		break;
@@ -1329,10 +1812,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		 *  B1: pointer to ctx
 		 *  B2: pointer to bpf_array
 		 *  B3: index in bpf_array
-		 */
-		jit->seen |= SEEN_TAIL_CALL;
-
-		/*
+		 *
 		 * if (index >= array->map.max_entries)
 		 *         goto out;
 		 */
@@ -1384,7 +1864,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/*
 		 * Restore registers before calling function
 		 */
-		save_restore_regs(jit, REGS_RESTORE, stack_depth);
+		save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
 
 		/*
 		 * goto *(prog->bpf_func + tail_call_start);
@@ -1393,8 +1873,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/* lg %r1,bpf_func(%r1) */
 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
 			      offsetof(struct bpf_prog, bpf_func));
-		/* bc 0xf,tail_call_start(%r1) */
-		_EMIT4(0x47f01000 + jit->tail_call_start);
+		if (nospec_uses_trampoline()) {
+			jit->seen |= SEEN_FUNC;
+			/* aghi %r1,tail_call_start */
+			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
+			/* brcl 0xf,__s390_indirect_jump_r1 */
+			EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+					     __s390_indirect_jump_r1);
+		} else {
+			/* bc 0xf,tail_call_start(%r1) */
+			_EMIT4(0x47f01000 + jit->tail_call_start);
+		}
 		/* out: */
 		if (jit->prg_buf) {
 			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
@@ -1437,6 +1926,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 * instruction itself (loop) and for BPF with offset 0 we
 	 * branch to the instruction behind the branch.
 	 */
+	case BPF_JMP32 | BPF_JA: /* if (true) */
+		branch_oc_off = imm;
+		fallthrough;
 	case BPF_JMP | BPF_JA: /* if (true) */
 		mask = 0xf000; /* j */
 		goto branch_oc;
@@ -1605,14 +2097,16 @@ branch_xu:
 		break;
 branch_oc:
 		if (!is_first_pass(jit) &&
-		    can_use_rel(jit, addrs[i + off + 1])) {
+		    can_use_rel(jit, addrs[i + branch_oc_off + 1])) {
 			/* brc mask,off */
 			EMIT4_PCREL_RIC(0xa7040000,
-					mask >> 12, addrs[i + off + 1]);
+					mask >> 12,
+					addrs[i + branch_oc_off + 1]);
 		} else {
 			/* brcl mask,off */
 			EMIT6_PCREL_RILC(0xc0040000,
-					 mask >> 12, addrs[i + off + 1]);
+					 mask >> 12,
+					 addrs[i + branch_oc_off + 1]);
 		}
 		break;
 	}
@@ -1621,22 +2115,6 @@ branch_oc:
 		return -1;
 	}
 
-	if (probe_prg != -1) {
-		/*
-		 * Handlers of certain exceptions leave psw.addr pointing to
-		 * the instruction directly after the failing one. Therefore,
-		 * create two exception table entries and also add a nop in
-		 * case two probing instructions come directly after each
-		 * other.
-		 */
-		nop_prg = jit->prg;
-		/* bcr 0,%0 */
-		_EMIT2(0x0700);
-		err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
-		if (err < 0)
-			return err;
-	}
-
 	return insn_count;
 }
 
@@ -1682,13 +2160,19 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 			bool extra_pass, u32 stack_depth)
 {
 	int i, insn_count, lit32_size, lit64_size;
+	u64 kern_arena;
 
 	jit->lit32 = jit->lit32_start;
 	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
 	jit->excnt = 0;
 
-	bpf_jit_prologue(jit, stack_depth);
+	kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
+	if (kern_arena)
+		jit->kern_arena = _EMIT_CONST_U64(kern_arena);
+	jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
+
+	bpf_jit_prologue(jit, fp, stack_depth);
 	if (bpf_set_addr(jit, 0) < 0)
 		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
@@ -1735,9 +2219,25 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
 					       struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header;
+	struct bpf_insn *insn;
 	u32 extable_size;
 	u32 code_size;
+	int i;
 
+	for (i = 0; i < fp->len; i++) {
+		insn = &fp->insnsi[i];
+
+		if (BPF_CLASS(insn->code) == BPF_STX &&
+		    BPF_MODE(insn->code) == BPF_PROBE_ATOMIC &&
+		    (BPF_SIZE(insn->code) == BPF_DW ||
+		     BPF_SIZE(insn->code) == BPF_W) &&
+		    insn->imm == BPF_XCHG)
+			/*
+			 * bpf_jit_insn() emits a load and a compare-and-swap,
+			 * both of which need to be probed.
+			 */
+			fp->aux->num_exentries += 1;
+	}
 	/* We need two entries per insn. */
 	fp->aux->num_exentries *= 2;
 
@@ -1836,7 +2336,11 @@ skip_init_ctx:
 		print_fn_code(jit.prg_buf, jit.size_prg);
 	}
 	if (!fp->is_func || extra_pass) {
-		bpf_jit_binary_lock_ro(header);
+		if (bpf_jit_binary_lock_ro(header)) {
+			bpf_jit_binary_free(header);
+			fp = orig_fp;
+			goto free_addrs;
+		}
 	} else {
 		jit_data->header = header;
 		jit_data->ctx = jit;
@@ -1859,3 +2363,600 @@ out:
 					   tmp : orig_fp);
 	return fp;
 }
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
+
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+	return true;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+		       void *old_addr, void *new_addr)
+{
+	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
+	struct {
+		u16 opc;
+		s32 disp;
+	} __packed insn;
+	char *ret;
+	int err;
+
+	/* Verify the branch to be patched. */
+	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
+	if (err < 0)
+		return err;
+	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
+		return -EINVAL;
+
+	if (t == BPF_MOD_JUMP &&
+	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
+		/*
+		 * The branch already points to the destination,
+		 * there is no PLT.
+		 */
+	} else {
+		/* Verify the PLT. */
+		plt = ip + (insn.disp << 1);
+		err = copy_from_kernel_nofault(&current_plt, plt,
+					       sizeof(current_plt));
+		if (err < 0)
+			return err;
+		ret = (char *)ip + 6;
+		bpf_jit_plt(&expected_plt, ret, old_addr);
+		if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
+			return -EINVAL;
+		/* Adjust the call address. */
+		bpf_jit_plt(&new_plt, ret, new_addr);
+		s390_kernel_write(&plt->target, &new_plt.target,
+				  sizeof(void *));
+	}
+
+	/* Adjust the mask of the branch. */
+	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
+	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
+
+	/* Make the new code visible to the other CPUs. */
+	text_poke_sync_lock();
+
+	return 0;
+}
+
+struct bpf_tramp_jit {
+	struct bpf_jit common;
+	int orig_stack_args_off;/* Offset of arguments placed on stack by the
+				 * func_addr's original caller
+				 */
+	int stack_size;		/* Trampoline stack size */
+	int backchain_off;	/* Offset of backchain */
+	int stack_args_off;	/* Offset of stack arguments for calling
+				 * func_addr, has to be at the top
+				 */
+	int reg_args_off;	/* Offset of register arguments for calling
+				 * func_addr
+				 */
+	int ip_off;		/* For bpf_get_func_ip(), has to be at
+				 * (ctx - 16)
+				 */
+	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
+				 * (ctx - 8)
+				 */
+	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
+				 * of BPF arguments followed by return value
+				 */
+	int retval_off;		/* Offset of return value (see above) */
+	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
+				 * for __bpf_prog_enter() return value and
+				 * func_addr respectively
+				 */
+	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
+	int tccnt_off;		/* Offset of saved tailcall counter */
+	int r14_off;		/* Offset of saved %r14, has to be at the
+				 * bottom */
+	int do_fexit;		/* do_fexit: label */
+};
+
+static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
+{
+	/* llihf %dst_reg,val_hi */
+	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
+	/* oilf %rdst_reg,val_lo */
+	EMIT6_IMM(0xc00d0000, dst_reg, val);
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
+			   const struct btf_func_model *m,
+			   struct bpf_tramp_link *tlink, bool save_ret)
+{
+	struct bpf_jit *jit = &tjit->common;
+	int cookie_off = tjit->run_ctx_off +
+			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+	struct bpf_prog *p = tlink->link.prog;
+	int patch;
+
+	/*
+	 * run_ctx.cookie = tlink->cookie;
+	 */
+
+	/* %r0 = tlink->cookie */
+	load_imm64(jit, REG_W0, tlink->cookie);
+	/* stg %r0,cookie_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+
+	/*
+	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
+	 *         goto skip;
+	 */
+
+	/* %r1 = __bpf_prog_enter */
+	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
+	/* %r2 = p */
+	load_imm64(jit, REG_2, (u64)p);
+	/* la %r3,run_ctx_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
+	/* %r1() */
+	call_r1(jit);
+	/* ltgr %r7,%r2 */
+	EMIT4(0xb9020000, REG_7, REG_2);
+	/* brcl 8,skip */
+	patch = jit->prg;
+	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
+
+	/*
+	 * retval = bpf_func(args, p->insnsi);
+	 */
+
+	/* %r1 = p->bpf_func */
+	load_imm64(jit, REG_1, (u64)p->bpf_func);
+	/* la %r2,bpf_args_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
+	/* %r3 = p->insnsi */
+	if (!p->jited)
+		load_imm64(jit, REG_3, (u64)p->insnsi);
+	/* %r1() */
+	call_r1(jit);
+	/* stg %r2,retval_off(%r15) */
+	if (save_ret) {
+		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+			return -1;
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+	}
+
+	/* skip: */
+	if (jit->prg_buf)
+		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
+
+	/*
+	 * __bpf_prog_exit(p, start, &run_ctx);
+	 */
+
+	/* %r1 = __bpf_prog_exit */
+	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
+	/* %r2 = p */
+	load_imm64(jit, REG_2, (u64)p);
+	/* lgr %r3,%r7 */
+	EMIT4(0xb9040000, REG_3, REG_7);
+	/* la %r4,run_ctx_off(%r15) */
+	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
+	/* %r1() */
+	call_r1(jit);
+
+	return 0;
+}
+
+static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
+{
+	int stack_offset = tjit->stack_size;
+
+	tjit->stack_size += size;
+	return stack_offset;
+}
+
+/* ABI uses %r2 - %r6 for parameter passing. */
+#define MAX_NR_REG_ARGS 5
+
+/* The "L" field of the "mvc" instruction is 8 bits. */
+#define MAX_MVC_SIZE 256
+#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
+
+/* -mfentry generates a 6-byte nop on s390x. */
+#define S390X_PATCH_SIZE 6
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+					 struct bpf_tramp_jit *tjit,
+					 const struct btf_func_model *m,
+					 u32 flags,
+					 struct bpf_tramp_links *tlinks,
+					 void *func_addr)
+{
+	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+	int nr_bpf_args, nr_reg_args, nr_stack_args;
+	struct bpf_jit *jit = &tjit->common;
+	int arg, bpf_arg_off;
+	int i, j;
+
+	/* Support as many stack arguments as "mvc" instruction can handle. */
+	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
+	nr_stack_args = m->nr_args - nr_reg_args;
+	if (nr_stack_args > MAX_NR_STACK_ARGS)
+		return -ENOTSUPP;
+
+	/* Return to %r14 in the struct_ops case. */
+	if (flags & BPF_TRAMP_F_INDIRECT)
+		flags |= BPF_TRAMP_F_SKIP_FRAME;
+
+	/*
+	 * Compute how many arguments we need to pass to BPF programs.
+	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
+	 * smaller are packed into 1 or 2 registers; larger arguments are
+	 * passed via pointers.
+	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
+	 * a register; larger arguments are passed via pointers.
+	 * We need to deal with this difference.
+	 */
+	nr_bpf_args = 0;
+	for (i = 0; i < m->nr_args; i++) {
+		if (m->arg_size[i] <= 8)
+			nr_bpf_args += 1;
+		else if (m->arg_size[i] <= 16)
+			nr_bpf_args += 2;
+		else
+			return -ENOTSUPP;
+	}
+
+	/*
+	 * Calculate the stack layout.
+	 */
+
+	/*
+	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
+	 * ABI requires, put our backchain at the end of the allocated memory.
+	 */
+	tjit->stack_size = STACK_FRAME_OVERHEAD;
+	tjit->backchain_off = tjit->stack_size - sizeof(u64);
+	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
+	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
+	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
+	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
+	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
+	tjit->run_ctx_off = alloc_stack(tjit,
+					sizeof(struct bpf_tramp_run_ctx));
+	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
+	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
+	/*
+	 * In accordance with the s390x ABI, the caller has allocated
+	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
+	 * backchain, and the rest we can use.
+	 */
+	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
+	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+
+	/* lgr %r1,%r15 */
+	EMIT4(0xb9040000, REG_1, REG_15);
+	/* aghi %r15,-stack_size */
+	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+	/* stg %r1,backchain_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
+		      tjit->backchain_off);
+	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+	_EMIT6(0xd203f000 | tjit->tccnt_off,
+	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
+	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+	if (nr_reg_args)
+		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+			      REG_2 + (nr_reg_args - 1), REG_15,
+			      tjit->reg_args_off);
+	for (i = 0, j = 0; i < m->nr_args; i++) {
+		if (i < MAX_NR_REG_ARGS)
+			arg = REG_2 + i;
+		else
+			arg = tjit->orig_stack_args_off +
+			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
+		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
+		if (m->arg_size[i] <= 8) {
+			if (i < MAX_NR_REG_ARGS)
+				/* stg %arg,bpf_arg_off(%r15) */
+				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
+					      REG_0, REG_15, bpf_arg_off);
+			else
+				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
+				_EMIT6(0xd207f000 | bpf_arg_off,
+				       0xf000 | arg);
+			j += 1;
+		} else {
+			if (i < MAX_NR_REG_ARGS) {
+				/* mvc bpf_arg_off(16,%r15),0(%arg) */
+				_EMIT6(0xd20ff000 | bpf_arg_off,
+				       reg2hex[arg] << 12);
+			} else {
+				/* lg %r1,arg(%r15) */
+				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
+					      REG_15, arg);
+				/* mvc bpf_arg_off(16,%r15),0(%r1) */
+				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
+			}
+			j += 2;
+		}
+	}
+	/* stmg %r7,%r8,r7_r8_off(%r15) */
+	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
+		      tjit->r7_r8_off);
+	/* stg %r14,r14_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
+
+	if (flags & BPF_TRAMP_F_ORIG_STACK) {
+		/*
+		 * The ftrace trampoline puts the return address (which is the
+		 * address of the original function + S390X_PATCH_SIZE) into
+		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
+		 * ftrace_init_nop() for details.
+		 */
+
+		/* lgr %r8,%r0 */
+		EMIT4(0xb9040000, REG_8, REG_0);
+	} else {
+		/* %r8 = func_addr + S390X_PATCH_SIZE */
+		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
+	}
+
+	/*
+	 * ip = func_addr;
+	 * arg_cnt = m->nr_args;
+	 */
+
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		/* %r0 = func_addr */
+		load_imm64(jit, REG_0, (u64)func_addr);
+		/* stg %r0,ip_off(%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+			      tjit->ip_off);
+	}
+	/* lghi %r0,nr_bpf_args */
+	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
+	/* stg %r0,arg_cnt_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+		      tjit->arg_cnt_off);
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		/*
+		 * __bpf_tramp_enter(im);
+		 */
+
+		/* %r1 = __bpf_tramp_enter */
+		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
+		/* %r2 = im */
+		load_imm64(jit, REG_2, (u64)im);
+		/* %r1() */
+		call_r1(jit);
+	}
+
+	for (i = 0; i < fentry->nr_links; i++)
+		if (invoke_bpf_prog(tjit, m, fentry->links[i],
+				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
+			return -EINVAL;
+
+	if (fmod_ret->nr_links) {
+		/*
+		 * retval = 0;
+		 */
+
+		/* xc retval_off(8,%r15),retval_off(%r15) */
+		_EMIT6(0xd707f000 | tjit->retval_off,
+		       0xf000 | tjit->retval_off);
+
+		for (i = 0; i < fmod_ret->nr_links; i++) {
+			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+				return -EINVAL;
+
+			/*
+			 * if (retval)
+			 *         goto do_fexit;
+			 */
+
+			/* ltg %r0,retval_off(%r15) */
+			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
+				      tjit->retval_off);
+			/* brcl 7,do_fexit */
+			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
+		}
+	}
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		/*
+		 * retval = func_addr(args);
+		 */
+
+		/* lmg %r2,%rN,reg_args_off(%r15) */
+		if (nr_reg_args)
+			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+				      REG_2 + (nr_reg_args - 1), REG_15,
+				      tjit->reg_args_off);
+		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
+		if (nr_stack_args)
+			_EMIT6(0xd200f000 |
+				       (nr_stack_args * sizeof(u64) - 1) << 16 |
+				       tjit->stack_args_off,
+			       0xf000 | tjit->orig_stack_args_off);
+		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
+		/* lgr %r1,%r8 */
+		EMIT4(0xb9040000, REG_1, REG_8);
+		/* %r1() */
+		call_r1(jit);
+		/* stg %r2,retval_off(%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+
+		im->ip_after_call = jit->prg_buf + jit->prg;
+
+		/*
+		 * The following nop will be patched by bpf_tramp_image_put().
+		 */
+
+		/* brcl 0,im->ip_epilogue */
+		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
+	}
+
+	/* do_fexit: */
+	tjit->do_fexit = jit->prg;
+	for (i = 0; i < fexit->nr_links; i++)
+		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
+			return -EINVAL;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		im->ip_epilogue = jit->prg_buf + jit->prg;
+
+		/*
+		 * __bpf_tramp_exit(im);
+		 */
+
+		/* %r1 = __bpf_tramp_exit */
+		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
+		/* %r2 = im */
+		load_imm64(jit, REG_2, (u64)im);
+		/* %r1() */
+		call_r1(jit);
+	}
+
+	/* lmg %r2,%rN,reg_args_off(%r15) */
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
+		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+			      REG_2 + (nr_reg_args - 1), REG_15,
+			      tjit->reg_args_off);
+	/* lgr %r1,%r8 */
+	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+		EMIT4(0xb9040000, REG_1, REG_8);
+	/* lmg %r7,%r8,r7_r8_off(%r15) */
+	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
+		      tjit->r7_r8_off);
+	/* lg %r14,r14_off(%r15) */
+	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
+	/* lg %r2,retval_off(%r15) */
+	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+			      tjit->retval_off);
+	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+	       0xf000 | tjit->tccnt_off);
+	/* aghi %r15,stack_size */
+	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		EMIT_JUMP_REG(14);
+	else
+		EMIT_JUMP_REG(1);
+
+	return 0;
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *orig_call)
+{
+	struct bpf_tramp_image im;
+	struct bpf_tramp_jit tjit;
+	int ret;
+
+	memset(&tjit, 0, sizeof(tjit));
+
+	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+					    tlinks, orig_call);
+
+	return ret < 0 ? ret : tjit.common.prg;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+				void *image_end, const struct btf_func_model *m,
+				u32 flags, struct bpf_tramp_links *tlinks,
+				void *func_addr)
+{
+	struct bpf_tramp_jit tjit;
+	int ret;
+
+	/* Compute offsets, check whether the code fits. */
+	memset(&tjit, 0, sizeof(tjit));
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+					    tlinks, func_addr);
+
+	if (ret < 0)
+		return ret;
+	if (tjit.common.prg > (char *)image_end - (char *)image)
+		/*
+		 * Use the same error code as for exceeding
+		 * BPF_MAX_TRAMP_LINKS.
+		 */
+		return -E2BIG;
+
+	tjit.common.prg = 0;
+	tjit.common.prg_buf = image;
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+					    tlinks, func_addr);
+
+	return ret < 0 ? ret : tjit.common.prg;
+}
+
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+	return true;
+}
+
+bool bpf_jit_supports_arena(void)
+{
+	return true;
+}
+
+bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
+{
+	if (!in_arena)
+		return true;
+	switch (insn->code) {
+	case BPF_STX | BPF_ATOMIC | BPF_B:
+	case BPF_STX | BPF_ATOMIC | BPF_H:
+	case BPF_STX | BPF_ATOMIC | BPF_W:
+	case BPF_STX | BPF_ATOMIC | BPF_DW:
+		if (bpf_atomic_is_load_store(insn))
+			return false;
+	}
+	return true;
+}
+
+bool bpf_jit_supports_exceptions(void)
+{
+	/*
+	 * Exceptions require unwinding support, which is always available,
+	 * because the kernel is always built with backchain.
+	 */
+	return true;
+}
+
+void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
+			 void *cookie)
+{
+	unsigned long addr, prev_addr = 0;
+	struct unwind_state state;
+
+	unwind_for_each_frame(&state, NULL, NULL, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			break;
+		/*
+		 * addr is a return address and state.sp is the value of %r15
+		 * at this address. exception_cb needs %r15 at entry to the
+		 * function containing addr, so take the next state.sp.
+		 *
+		 * There is no bp, and the exception_cb prog does not need one
+		 * to perform a quasi-longjmp. The common code requires a
+		 * non-zero bp, so pass sp there as well.
+		 */
+		if (prev_addr && !consume_fn(cookie, prev_addr, state.sp,
+					     state.sp))
+			break;
+		prev_addr = addr;
+	}
+}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index bf557a1b789c..1810e0944a4e 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,8 @@
 # Makefile for the s390 PCI subsystem.
 #
 
-obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_clp.o \
 			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
-			   pci_bus.o
+			   pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o
 obj-$(CONFIG_PCI_IOV)	+= pci_iov.o
+obj-$(CONFIG_SYSFS)	+= pci_sysfs.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index bc980fd313d5..cd6676c2d602 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -28,7 +28,10 @@
 #include <linux/jump_label.h>
 #include <linux/pci.h>
 #include <linux/printk.h>
+#include <linux/lockdep.h>
+#include <linux/list_sort.h>
 
+#include <asm/machine.h>
 #include <asm/isc.h>
 #include <asm/airq.h>
 #include <asm/facility.h>
@@ -42,6 +45,7 @@
 /* list of all detected zpci devices */
 static LIST_HEAD(zpci_list);
 static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
 
 static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
 static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -61,6 +65,21 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
 
 static struct kmem_cache *zdev_fmb_cache;
 
+/* AEN structures that must be preserved over KVM module re-insertion */
+union zpci_sic_iib *zpci_aipb;
+EXPORT_SYMBOL_GPL(zpci_aipb);
+struct airq_iv *zpci_aif_sbv;
+EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+	if (!zdev)
+		return;
+	mutex_lock(&zpci_add_remove_lock);
+	kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+	mutex_unlock(&zpci_add_remove_lock);
+}
+
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 	struct zpci_dev *tmp, *zdev = NULL;
@@ -110,21 +129,26 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
 
 /* Modify PCI: Register I/O address translation parameters */
 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
-		       u64 base, u64 limit, u64 iota)
+		       u64 base, u64 limit, u64 iota, u8 *status)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
 	struct zpci_fib fib = {0};
-	u8 cc, status;
+	u8 cc;
 
-	WARN_ON_ONCE(iota & 0x3fff);
 	fib.pba = base;
-	fib.pal = limit;
-	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
-	cc = zpci_mod_fc(req, &fib, &status);
+	/* Work around off by one in ISM virt device */
+	if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+		fib.pal = limit + (1 << 12);
+	else
+		fib.pal = limit;
+	fib.iota = iota;
+	fib.gd = zdev->gisa;
+	cc = zpci_mod_fc(req, &fib, status);
 	if (cc)
-		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
 	return cc;
 }
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
 
 /* Modify PCI: Unregister I/O address translation parameters */
 int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -133,6 +157,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 	struct zpci_fib fib = {0};
 	u8 cc, status;
 
+	fib.gd = zdev->gisa;
+
 	cc = zpci_mod_fc(req, &fib, &status);
 	if (cc)
 		zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
@@ -143,7 +169,9 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
 int zpci_fmb_enable_device(struct zpci_dev *zdev)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+	struct zpci_iommu_ctrs *ctrs;
 	struct zpci_fib fib = {0};
+	unsigned long flags;
 	u8 cc, status;
 
 	if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@@ -155,11 +183,20 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
 	WARN_ON((u64) zdev->fmb & 0xf);
 
 	/* reset software counters */
-	atomic64_set(&zdev->allocated_pages, 0);
-	atomic64_set(&zdev->mapped_pages, 0);
-	atomic64_set(&zdev->unmapped_pages, 0);
+	spin_lock_irqsave(&zdev->dom_lock, flags);
+	ctrs = zpci_get_iommu_ctrs(zdev);
+	if (ctrs) {
+		atomic64_set(&ctrs->mapped_pages, 0);
+		atomic64_set(&ctrs->unmapped_pages, 0);
+		atomic64_set(&ctrs->global_rpcits, 0);
+		atomic64_set(&ctrs->sync_map_rpcits, 0);
+		atomic64_set(&ctrs->sync_rpcits, 0);
+	}
+	spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
 
 	fib.fmb_addr = virt_to_phys(zdev->fmb);
+	fib.gd = zdev->gisa;
 	cc = zpci_mod_fc(req, &fib, &status);
 	if (cc) {
 		kmem_cache_free(zdev_fmb_cache, zdev->fmb);
@@ -178,6 +215,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
 	if (!zdev->fmb)
 		return -EINVAL;
 
+	fib.gd = zdev->gisa;
+
 	/* Function measurement is disabled if fmb address is zero */
 	cc = zpci_mod_fc(req, &fib, &status);
 	if (cc == 3) /* Function already gone. */
@@ -225,68 +264,25 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	return 0;
 }
 
-/* combine single writes by using store-block insn */
-void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+			   pgprot_t prot)
 {
-       zpci_memcpy_toio(to, from, count);
-}
-
-static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
-{
-	unsigned long offset, vaddr;
-	struct vm_struct *area;
-	phys_addr_t last_addr;
-
-	last_addr = addr + size - 1;
-	if (!size || last_addr < addr)
-		return NULL;
-
+	/*
+	 * When PCI MIO instructions are unavailable the "physical" address
+	 * encodes a hint for accessing the PCI memory space it represents.
+	 * Just pass it unchanged such that ioread/iowrite can decode it.
+	 */
 	if (!static_branch_unlikely(&have_mio))
-		return (void __iomem *) addr;
+		return (void __iomem *)phys_addr;
 
-	offset = addr & ~PAGE_MASK;
-	addr &= PAGE_MASK;
-	size = PAGE_ALIGN(size + offset);
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-
-	vaddr = (unsigned long) area->addr;
-	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
-		free_vm_area(area);
-		return NULL;
-	}
-	return (void __iomem *) ((unsigned long) area->addr + offset);
-}
-
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
-{
-	return __ioremap(addr, size, __pgprot(prot));
+	return generic_ioremap_prot(phys_addr, size, prot);
 }
 EXPORT_SYMBOL(ioremap_prot);
 
-void __iomem *ioremap(phys_addr_t addr, size_t size)
-{
-	return __ioremap(addr, size, PAGE_KERNEL);
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
-{
-	return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
-{
-	return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
 void iounmap(volatile void __iomem *addr)
 {
 	if (static_branch_likely(&have_mio))
-		vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+		generic_iounmap(addr);
 }
 EXPORT_SYMBOL(iounmap);
 
@@ -531,8 +527,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
 	return r;
 }
 
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
-			     struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
 {
 	unsigned long addr, size, flags;
 	struct resource *res;
@@ -568,7 +563,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
 			return -ENOMEM;
 		}
 		zdev->bars[i].res = res;
-		pci_add_resource(resources, res);
 	}
 	zdev->has_resources = 1;
 
@@ -577,17 +571,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
 
 static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 {
+	struct resource *res;
 	int i;
 
+	pci_lock_rescan_remove();
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		if (!zdev->bars[i].size || !zdev->bars[i].res)
+		res = zdev->bars[i].res;
+		if (!res)
 			continue;
 
+		release_resource(res);
+		pci_bus_remove_resource(zdev->zbus->bus, res);
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
-		release_resource(zdev->bars[i].res);
-		kfree(zdev->bars[i].res);
+		zdev->bars[i].res = NULL;
+		kfree(res);
 	}
 	zdev->has_resources = 0;
+	pci_unlock_rescan_remove();
 }
 
 int pcibios_device_add(struct pci_dev *pdev)
@@ -601,8 +601,6 @@ int pcibios_device_add(struct pci_dev *pdev)
 	if (pdev->is_physfn)
 		pdev->no_vf_scan = 1;
 
-	pdev->dev.groups = zpci_attr_groups;
-	pdev->dev.dma_ops = &s390_pci_dma_ops;
 	zpci_map_resources(pdev);
 
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -700,6 +698,24 @@ int zpci_enable_device(struct zpci_dev *zdev)
 		zpci_update_fh(zdev, fh);
 	return rc;
 }
+EXPORT_SYMBOL_GPL(zpci_enable_device);
+
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+	u8 status;
+	int rc;
+
+	rc = zpci_enable_device(zdev);
+	if (rc)
+		return rc;
+
+	rc = zpci_iommu_register_ioat(zdev, &status);
+	if (rc)
+		zpci_disable_device(zdev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
 
 int zpci_disable_device(struct zpci_dev *zdev)
 {
@@ -723,6 +739,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
 	}
 	return rc;
 }
+EXPORT_SYMBOL_GPL(zpci_disable_device);
 
 /**
  * zpci_hot_reset_device - perform a reset of the given zPCI function
@@ -738,12 +755,12 @@ int zpci_disable_device(struct zpci_dev *zdev)
  * equivalent to its state during boot when first probing a driver.
  * Consequently after reset the PCI function requires re-initialization via the
  * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
- * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * and enabling the function via e.g. pci_enable_device_flags(). The caller
  * must guard against concurrent reset attempts.
  *
  * In most cases this function should not be called directly but through
  * pci_reset_function() or pci_reset_bus() which handle the save/restore and
- * locking.
+ * locking - asserted by lockdep.
  *
  * Return: 0 on success and an error value otherwise
  */
@@ -751,6 +768,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
 {
 	int rc;
 
+	lockdep_assert_held(&zdev->state_lock);
 	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
 	if (zdev_enabled(zdev)) {
 		/* Disables device access, DMAs and IRQs (reset state) */
@@ -766,21 +784,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
 			return rc;
 	}
 
-	rc = zpci_enable_device(zdev);
-	if (rc)
-		return rc;
+	rc = zpci_reenable_device(zdev);
 
-	if (zdev->dma_table)
-		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-					virt_to_phys(zdev->dma_table));
-	else
-		rc = zpci_dma_init_device(zdev);
-	if (rc) {
-		zpci_disable_device(zdev);
-		return rc;
-	}
-
-	return 0;
+	return rc;
 }
 
 /**
@@ -789,8 +795,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
  * @fh: Current Function Handle of the device to be created
  * @state: Initial state after creation either Standby or Configured
  *
- * Creates a new zpci device and adds it to its, possibly newly created, zbus
- * as well as zpci_list.
+ * Allocates a new struct zpci_dev and queries the platform for its details.
+ * If successful the device can subsequently be added to the zPCI subsystem
+ * using zpci_add_device().
  *
  * Returns: the zdev on success or an error pointer otherwise
  */
@@ -799,7 +806,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
 	struct zpci_dev *zdev;
 	int rc;
 
-	zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
 	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
 	if (!zdev)
 		return ERR_PTR(-ENOMEM);
@@ -814,9 +820,35 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
 		goto error;
 	zdev->state =  state;
 
-	kref_init(&zdev->kref);
-	mutex_init(&zdev->lock);
+	mutex_init(&zdev->state_lock);
+	mutex_init(&zdev->fmb_lock);
+	mutex_init(&zdev->kzdev_lock);
+
+	return zdev;
 
+error:
+	zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc);
+	kfree(zdev);
+	return ERR_PTR(rc);
+}
+
+/**
+ * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem
+ * @zdev: The zPCI device to be added
+ *
+ * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating
+ * a new one as necessary. A hotplug slot is created and events start to be handled.
+ * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used.
+ * If adding the struct zpci_dev fails the device was not added and should be freed.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_add_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	mutex_lock(&zpci_add_remove_lock);
+	zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
 	rc = zpci_init_iommu(zdev);
 	if (rc)
 		goto error;
@@ -825,18 +857,19 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
 	if (rc)
 		goto error_destroy_iommu;
 
+	kref_init(&zdev->kref);
 	spin_lock(&zpci_list_lock);
 	list_add_tail(&zdev->entry, &zpci_list);
 	spin_unlock(&zpci_list_lock);
-
-	return zdev;
+	mutex_unlock(&zpci_add_remove_lock);
+	return 0;
 
 error_destroy_iommu:
 	zpci_destroy_iommu(zdev);
 error:
-	zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
-	kfree(zdev);
-	return ERR_PTR(rc);
+	zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+	mutex_unlock(&zpci_add_remove_lock);
+	return rc;
 }
 
 bool zpci_is_device_configured(struct zpci_dev *zdev)
@@ -853,32 +886,15 @@ bool zpci_is_device_configured(struct zpci_dev *zdev)
  * @fh: The general function handle supplied by the platform
  *
  * Given a device in the configuration state Configured, enables, scans and
- * adds it to the common code PCI subsystem if possible. If the PCI device is
- * parked because we can not yet create a PCI bus because we have not seen
- * function 0, it is ignored but will be scanned once function 0 appears.
- * If any failure occurs, the zpci_dev is left disabled.
+ * adds it to the common code PCI subsystem if possible. If any failure occurs,
+ * the zpci_dev is left disabled.
  *
  * Return: 0 on success, or an error code otherwise
  */
 int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
 {
-	int rc;
-
 	zpci_update_fh(zdev, fh);
-	/* the PCI function will be scanned once function 0 appears */
-	if (!zdev->zbus->bus)
-		return 0;
-
-	/* For function 0 on a multi-function bus scan whole bus as we might
-	 * have to pick up existing functions waiting for it to allow creating
-	 * the PCI bus
-	 */
-	if (zdev->devfn == 0 && zdev->zbus->multifunction)
-		rc = zpci_bus_scan_bus(zdev->zbus);
-	else
-		rc = zpci_bus_scan_device(zdev);
-
-	return rc;
+	return zpci_bus_scan_device(zdev);
 }
 
 /**
@@ -895,14 +911,13 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
 {
 	int rc;
 
+	lockdep_assert_held(&zdev->state_lock);
+	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+		return 0;
+
 	if (zdev->zbus->bus)
 		zpci_bus_remove_device(zdev, false);
 
-	if (zdev->dma_table) {
-		rc = zpci_dma_exit_device(zdev);
-		if (rc)
-			return rc;
-	}
 	if (zdev_enabled(zdev)) {
 		rc = zpci_disable_device(zdev);
 		if (rc)
@@ -919,67 +934,50 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
 }
 
 /**
- * zpci_device_reserved() - Mark device as resverved
+ * zpci_device_reserved() - Mark device as reserved
  * @zdev: the zpci_dev that was reserved
  *
  * Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
  */
 void zpci_device_reserved(struct zpci_dev *zdev)
 {
-	if (zdev->has_hp_slot)
-		zpci_exit_slot(zdev);
-	/*
-	 * Remove device from zpci_list as it is going away. This also
-	 * makes sure we ignore subsequent zPCI events for this device.
-	 */
-	spin_lock(&zpci_list_lock);
-	list_del(&zdev->entry);
-	spin_unlock(&zpci_list_lock);
+	lockdep_assert_held(&zdev->state_lock);
+	/* We may declare the device reserved multiple times */
+	if (zdev->state == ZPCI_FN_STATE_RESERVED)
+		return;
 	zdev->state = ZPCI_FN_STATE_RESERVED;
 	zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+	/*
+	 * The underlying device is gone. Allow the zdev to be freed
+	 * as soon as all other references are gone by accounting for
+	 * the removal as a dropped reference.
+	 */
 	zpci_zdev_put(zdev);
 }
 
 void zpci_release_device(struct kref *kref)
 {
 	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
-	int ret;
 
-	if (zdev->zbus->bus)
-		zpci_bus_remove_device(zdev, false);
+	lockdep_assert_held(&zpci_add_remove_lock);
+	WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
+	/*
+	 * We already hold zpci_list_lock thanks to kref_put_lock().
+	 * This makes sure no new reference can be taken from the list.
+	 */
+	list_del(&zdev->entry);
+	spin_unlock(&zpci_list_lock);
 
-	if (zdev->dma_table)
-		zpci_dma_exit_device(zdev);
-	if (zdev_enabled(zdev))
-		zpci_disable_device(zdev);
+	if (zdev->has_hp_slot)
+		zpci_exit_slot(zdev);
 
-	switch (zdev->state) {
-	case ZPCI_FN_STATE_CONFIGURED:
-		ret = sclp_pci_deconfigure(zdev->fid);
-		zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
-		fallthrough;
-	case ZPCI_FN_STATE_STANDBY:
-		if (zdev->has_hp_slot)
-			zpci_exit_slot(zdev);
-		spin_lock(&zpci_list_lock);
-		list_del(&zdev->entry);
-		spin_unlock(&zpci_list_lock);
-		zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
-		fallthrough;
-	case ZPCI_FN_STATE_RESERVED:
-		if (zdev->has_resources)
-			zpci_cleanup_bus_resources(zdev);
-		zpci_bus_device_unregister(zdev);
-		zpci_destroy_iommu(zdev);
-		fallthrough;
-	default:
-		break;
-	}
+	if (zdev->has_resources)
+		zpci_cleanup_bus_resources(zdev);
+
+	zpci_bus_device_unregister(zdev);
+	zpci_destroy_iommu(zdev);
 	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
-	kfree(zdev);
+	kfree_rcu(zdev, rcu);
 }
 
 int zpci_report_error(struct pci_dev *pdev,
@@ -1094,7 +1092,7 @@ char * __init pcibios_setup(char *str)
 		return NULL;
 	}
 	if (!strcmp(str, "nomio")) {
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+		clear_machine_feature(MFEATURE_PCI_MIO);
 		return NULL;
 	}
 	if (!strcmp(str, "force_floating")) {
@@ -1113,6 +1111,50 @@ bool zpci_is_enabled(void)
 	return s390_pci_initialized;
 }
 
+static int zpci_cmp_rid(void *priv, const struct list_head *a,
+			const struct list_head *b)
+{
+	struct zpci_dev *za = container_of(a, struct zpci_dev, entry);
+	struct zpci_dev *zb = container_of(b, struct zpci_dev, entry);
+
+	/*
+	 * PCI functions without RID available maintain original order
+	 * between themselves but sort before those with RID.
+	 */
+	if (za->rid == zb->rid)
+		return za->rid_available > zb->rid_available;
+	/*
+	 * PCI functions with RID sort by RID ascending.
+	 */
+	return za->rid > zb->rid;
+}
+
+static void zpci_add_devices(struct list_head *scan_list)
+{
+	struct zpci_dev *zdev, *tmp;
+
+	list_sort(NULL, scan_list, &zpci_cmp_rid);
+	list_for_each_entry_safe(zdev, tmp, scan_list, entry) {
+		list_del_init(&zdev->entry);
+		if (zpci_add_device(zdev))
+			kfree(zdev);
+	}
+}
+
+int zpci_scan_devices(void)
+{
+	LIST_HEAD(scan_list);
+	int rc;
+
+	rc = clp_scan_pci_devices(&scan_list);
+	if (rc)
+		return rc;
+
+	zpci_add_devices(&scan_list);
+	zpci_bus_scan_busses();
+	return 0;
+}
+
 static int __init pci_base_init(void)
 {
 	int rc;
@@ -1125,9 +1167,9 @@ static int __init pci_base_init(void)
 		return 0;
 	}
 
-	if (MACHINE_HAS_PCI_MIO) {
+	if (test_machine_feature(MFEATURE_PCI_MIO)) {
 		static_branch_enable(&have_mio);
-		ctl_set_bit(2, 5);
+		system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
 	}
 
 	rc = zpci_debug_init();
@@ -1142,21 +1184,14 @@ static int __init pci_base_init(void)
 	if (rc)
 		goto out_irq;
 
-	rc = zpci_dma_init();
-	if (rc)
-		goto out_dma;
-
-	rc = clp_scan_pci_devices();
+	rc = zpci_scan_devices();
 	if (rc)
 		goto out_find;
-	zpci_bus_scan_busses();
 
 	s390_pci_initialized = 1;
 	return 0;
 
 out_find:
-	zpci_dma_exit();
-out_dma:
 	zpci_irq_exit();
 out_irq:
 	zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 5d77acbd1c87..81bdb54ad5e3 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -19,6 +19,7 @@
 #include <linux/jump_label.h>
 #include <linux/pci.h>
 #include <linux/printk.h>
+#include <linux/dma-direct.h>
 
 #include <asm/pci_clp.h>
 #include <asm/pci_dma.h>
@@ -41,26 +42,19 @@ static int zpci_nb_devices;
  */
 static int zpci_bus_prepare_device(struct zpci_dev *zdev)
 {
-	struct resource_entry *window, *n;
-	struct resource *res;
-	int rc;
+	int rc, i;
 
 	if (!zdev_enabled(zdev)) {
 		rc = zpci_enable_device(zdev);
 		if (rc)
 			return rc;
-		rc = zpci_dma_init_device(zdev);
-		if (rc) {
-			zpci_disable_device(zdev);
-			return rc;
-		}
 	}
 
 	if (!zdev->has_resources) {
-		zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
-		resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
-			res = window->res;
-			pci_bus_add_resource(zdev->zbus->bus, res, 0);
+		zpci_setup_bus_resources(zdev);
+		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+			if (zdev->bars[i].res)
+				pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res);
 		}
 	}
 
@@ -87,9 +81,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev)
 	if (!pdev)
 		return -ENODEV;
 
-	pci_bus_add_device(pdev);
 	pci_lock_rescan_remove();
-	pci_bus_add_devices(zdev->zbus->bus);
+	pci_bus_add_device(pdev);
 	pci_unlock_rescan_remove();
 
 	return 0;
@@ -132,11 +125,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error)
  * @zbus: the zbus to be scanned
  *
  * Enables and scans all PCI functions on the bus making them available to the
- * common PCI code. If there is no function 0 on the zbus nothing is scanned. If
- * a function does not have a slot yet because it was added to the zbus before
- * function 0 the slot is created. If a PCI function fails to be initialized
- * an error will be returned but attempts will still be made for all other
- * functions on the bus.
+ * common PCI code. If a PCI function fails to be initialized an error will be
+ * returned but attempts will still be made for all other functions on the bus.
  *
  * Return: 0 on success, an error value otherwise
  */
@@ -145,9 +135,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
 	struct zpci_dev *zdev;
 	int devfn, rc, ret = 0;
 
-	if (!zbus->function[0])
-		return 0;
-
 	for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
 		zdev = zbus->function[devfn];
 		if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
@@ -182,28 +169,34 @@ void zpci_bus_scan_busses(void)
 	mutex_unlock(&zbus_list_lock);
 }
 
+static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
+{
+	return !s390_pci_no_rid && zdev->rid_available &&
+		!zdev->vfn;
+}
+
 /* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
  * @zbus: the zbus holding the zdevices
- * @f0: function 0 of the bus
+ * @fr: PCI root function that will determine the bus's domain, and bus speed
  * @ops: the pci operations
  *
- * Function zero is taken as a parameter as this is used to determine the
- * domain, multifunction property and maximum bus speed of the entire bus.
+ * The PCI function @fr determines the domain (its UID), multifunction property
+ * and maximum bus speed of the entire bus.
  *
  * Return: 0 on success, an error code otherwise
  */
-static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *f0, struct pci_ops *ops)
+static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
 {
 	struct pci_bus *bus;
 	int domain;
 
-	domain = zpci_alloc_domain((u16)f0->uid);
+	domain = zpci_alloc_domain((u16)fr->uid);
 	if (domain < 0)
 		return domain;
 
 	zbus->domain_nr = domain;
-	zbus->multifunction = f0->rid_available;
-	zbus->max_bus_speed = f0->max_bus_speed;
+	zbus->multifunction = zpci_bus_is_multifunction_root(fr);
+	zbus->max_bus_speed = fr->max_bus_speed;
 
 	/*
 	 * Note that the zbus->resources are taken over and zbus->resources
@@ -216,7 +209,6 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *f0, s
 	}
 
 	zbus->bus = bus;
-	pci_bus_add_devices(bus);
 
 	return 0;
 }
@@ -247,13 +239,15 @@ static void zpci_bus_put(struct zpci_bus *zbus)
 	kref_put(&zbus->kref, zpci_bus_release);
 }
 
-static struct zpci_bus *zpci_bus_get(int pchid)
+static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
 {
 	struct zpci_bus *zbus;
 
 	mutex_lock(&zbus_list_lock);
 	list_for_each_entry(zbus, &zbus_list, bus_next) {
-		if (pchid == zbus->pchid) {
+		if (!zbus->multifunction)
+			continue;
+		if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
 			kref_get(&zbus->kref);
 			goto out_unlock;
 		}
@@ -264,7 +258,7 @@ out_unlock:
 	return zbus;
 }
 
-static struct zpci_bus *zpci_bus_alloc(int pchid)
+static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
 {
 	struct zpci_bus *zbus;
 
@@ -272,7 +266,8 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
 	if (!zbus)
 		return NULL;
 
-	zbus->pchid = pchid;
+	zbus->topo = topo;
+	zbus->topo_is_tid = topo_is_tid;
 	INIT_LIST_HEAD(&zbus->bus_next);
 	mutex_lock(&zbus_list_lock);
 	list_add_tail(&zbus->bus_next, &zbus_list);
@@ -289,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
 	return zbus;
 }
 
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+	u64 aligned_end, size;
+	dma_addr_t dma_start;
+	int ret;
+
+	dma_start = PAGE_ALIGN(zdev->start_dma);
+	aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
+	if (aligned_end >= dma_start)
+		size = aligned_end - dma_start;
+	else
+		size = 0;
+	WARN_ON_ONCE(size == 0);
+
+	ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size);
+	if (ret)
+		pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev));
+}
+
 void pcibios_bus_add_device(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
 
+	pci_dma_range_setup(pdev);
+
 	/*
 	 * With pdev->no_vf_scan the common PCI probing code does not
 	 * perform PF/VF linking.
@@ -303,51 +320,18 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
 	}
 }
 
-/* zpci_bus_create_hotplug_slots - Add hotplug slot(s) for device added to bus
- * @zdev: the zPCI device that was newly added
- *
- * Add the hotplug slot(s) for the newly added PCI function. Normally this is
- * simply the slot for the function itself. If however we are adding the
- * function 0 on a zbus, it might be that we already registered functions on
- * that zbus but could not create their hotplug slots yet so add those now too.
- *
- * Return: 0 on success, an error code otherwise
- */
-static int zpci_bus_create_hotplug_slots(struct zpci_dev *zdev)
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
 {
-	struct zpci_bus *zbus = zdev->zbus;
-	int devfn, rc = 0;
-
-	rc = zpci_init_slot(zdev);
-	if (rc)
-		return rc;
-	zdev->has_hp_slot = 1;
+	int rc = -EINVAL;
 
-	if (zdev->devfn == 0 && zbus->multifunction) {
-		/* Now that function 0 is there we can finally create the
-		 * hotplug slots for those functions with devfn != 0 that have
-		 * been parked in zbus->function[] waiting for us to be able to
-		 * create the PCI bus.
-		 */
-		for  (devfn = 1; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
-			zdev = zbus->function[devfn];
-			if (zdev && !zdev->has_hp_slot) {
-				rc = zpci_init_slot(zdev);
-				if (rc)
-					return rc;
-				zdev->has_hp_slot = 1;
-			}
+	if (zbus->multifunction) {
+		if (!zdev->rid_available) {
+			WARN_ONCE(1, "rid_available not set for multifunction\n");
+			return rc;
 		}
-
+		zdev->devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
 	}
 
-	return rc;
-}
-
-static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
-{
-	int rc = -EINVAL;
-
 	if (zbus->function[zdev->devfn]) {
 		pr_err("devfn %04x is already assigned\n", zdev->devfn);
 		return rc;
@@ -356,17 +340,10 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
 	zbus->function[zdev->devfn] = zdev;
 	zpci_nb_devices++;
 
-	if (zbus->bus) {
-		if (zbus->multifunction && !zdev->rid_available) {
-			WARN_ONCE(1, "rid_available not set for multifunction\n");
-			goto error;
-		}
-
-		zpci_bus_create_hotplug_slots(zdev);
-	} else {
-		/* Hotplug slot will be created once function 0 appears */
-		zbus->multifunction = 1;
-	}
+	rc = zpci_init_slot(zdev);
+	if (rc)
+		goto error;
+	zdev->has_hp_slot = 1;
 
 	return 0;
 
@@ -377,10 +354,25 @@ error:
 	return rc;
 }
 
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+	struct pci_dev *pdev;
+
+	if (!zdev->vfn)
+		return false;
+
+	pdev = zpci_iov_find_parent_pf(zbus, zdev);
+	if (!pdev)
+		return true;
+	pci_dev_put(pdev);
+	return false;
+}
+
 int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
 {
+	bool topo_is_tid = zdev->tid_avail;
 	struct zpci_bus *zbus = NULL;
-	int rc = -EBADF;
+	int topo, rc = -EBADF;
 
 	if (zpci_nb_devices == ZPCI_NR_DEVICES) {
 		pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
@@ -388,19 +380,28 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
 		return -ENOSPC;
 	}
 
-	if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
-		return -EINVAL;
-
-	if (!s390_pci_no_rid && zdev->rid_available)
-		zbus = zpci_bus_get(zdev->pchid);
+	topo = topo_is_tid ? zdev->tid : zdev->pchid;
+	zbus = zpci_bus_get(topo, topo_is_tid);
+	/*
+	 * An isolated VF gets its own domain/bus even if there exists
+	 * a matching domain/bus already
+	 */
+	if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+		zpci_bus_put(zbus);
+		zbus = NULL;
+	}
 
 	if (!zbus) {
-		zbus = zpci_bus_alloc(zdev->pchid);
+		zbus = zpci_bus_alloc(topo, topo_is_tid);
 		if (!zbus)
 			return -ENOMEM;
 	}
 
-	if (zdev->devfn == 0) {
+	if (!zbus->bus) {
+		/* The UID of the first PCI function registered with a zpci_bus
+		 * is used as the domain number for that bus. Currently there
+		 * is exactly one zpci_bus per domain.
+		 */
 		rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
 		if (rc)
 			goto error;
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e96c9860e064..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -6,6 +6,10 @@
  *   Pierre Morel <pmorel@linux.ibm.com>
  *
  */
+#ifndef __S390_PCI_BUS_H
+#define __S390_PCI_BUS_H
+
+#include <linux/pci.h>
 
 int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
 void zpci_bus_device_unregister(struct zpci_dev *zdev);
@@ -17,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
 void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
 
 void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
-	if (zdev)
-		kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
 
 static inline void zpci_zdev_get(struct zpci_dev *zdev)
 {
@@ -30,8 +31,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
 
 int zpci_alloc_domain(int domain);
 void zpci_free_domain(int domain);
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
-			     struct list_head *resources);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
 
 static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
 					     unsigned int devfn)
@@ -41,3 +41,4 @@ static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
 	return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
 }
 
+#endif /* __S390_PCI_BUS_H */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 375e0a5120bc..241f7251c873 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -20,6 +20,7 @@
 #include <asm/asm-extable.h>
 #include <asm/pci_debug.h>
 #include <asm/pci_clp.h>
+#include <asm/asm.h>
 #include <asm/clp.h>
 #include <uapi/asm/clp.h>
 
@@ -52,18 +53,20 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
 static inline int clp_get_ilp(unsigned long *ilp)
 {
 	unsigned long mask;
-	int cc = 3;
+	int cc, exception;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"	.insn	rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
-		"0:	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
-		: "cc");
+		: CC_OUT(cc, cc), [mask] "=d" (mask), [exc] "+d" (exception)
+		: [cmd] "a" (1)
+		: CC_CLOBBER);
 	*ilp = mask;
-	return cc;
+	return exception ? 3 : CC_TRANSFORM(cc);
 }
 
 /*
@@ -72,19 +75,20 @@ static inline int clp_get_ilp(unsigned long *ilp)
 static __always_inline int clp_req(void *data, unsigned int lps)
 {
 	struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+	int cc, exception;
 	u64 ignored;
-	int cc = 3;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"	.insn	rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
-		"0:	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+		: CC_OUT(cc, cc), [ign] "=d" (ignored), "+m" (*req), [exc] "+d" (exception)
 		: [req] "a" (req), [lps] "i" (lps)
-		: "cc");
-	return cc;
+		: CC_CLOBBER);
+	return exception ? 3 : CC_TRANSFORM(cc);
 }
 
 static void *clp_alloc_block(gfp_t gfp_mask)
@@ -106,6 +110,9 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
 	zdev->max_msi = response->noi;
 	zdev->fmb_update = response->mui;
 	zdev->version = response->version;
+	zdev->maxstbl = response->maxstbl;
+	zdev->dtsm = response->dtsm;
+	zdev->rtr_avail = response->rtr;
 
 	switch (response->version) {
 	case 1:
@@ -160,12 +167,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 	zdev->pft = response->pft;
 	zdev->vfn = response->vfn;
 	zdev->port = response->port;
+	zdev->fidparm = response->fidparm;
 	zdev->uid = response->uid;
 	zdev->fmb_length = sizeof(u32) * response->fmb_len;
-	zdev->rid_available = response->rid_avail;
 	zdev->is_physfn = response->is_physfn;
-	if (!s390_pci_no_rid && zdev->rid_available)
-		zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
+	zdev->rid_available = response->rid_avail;
+	if (zdev->rid_available)
+		zdev->rid = response->rid;
+	zdev->tid_avail = response->tid_avail;
+	if (zdev->tid_avail)
+		zdev->tid = response->tid;
 
 	memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
 	if (response->util_str_avail) {
@@ -229,12 +240,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
 {
 	struct clp_req_rsp_set_pci *rrb;
 	int rc, retries = 100;
+	u32 gisa = 0;
 
 	*fh = 0;
 	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
 		return -ENOMEM;
 
+	if (command != CLP_SET_DISABLE_PCI_FN)
+		gisa = zdev->gisa;
+
 	do {
 		memset(rrb, 0, sizeof(*rrb));
 		rrb->request.hdr.len = sizeof(rrb->request);
@@ -243,6 +258,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
 		rrb->request.fh = zdev->fh;
 		rrb->request.oc = command;
 		rrb->request.ndas = nr_dma_as;
+		rrb->request.gisa = gisa;
 
 		rc = clp_req(rrb, CLP_LPS_PCI);
 		if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
@@ -400,6 +416,7 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
 
 static void __clp_add(struct clp_fh_list_entry *entry, void *data)
 {
+	struct list_head *scan_list = data;
 	struct zpci_dev *zdev;
 
 	if (!entry->vendor_id)
@@ -410,10 +427,13 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
 		zpci_zdev_put(zdev);
 		return;
 	}
-	zpci_create_device(entry->fid, entry->fh, entry->config_state);
+	zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state);
+	if (IS_ERR(zdev))
+		return;
+	list_add_tail(&zdev->entry, scan_list);
 }
 
-int clp_scan_pci_devices(void)
+int clp_scan_pci_devices(struct list_head *scan_list)
 {
 	struct clp_req_rsp_list_pci *rrb;
 	int rc;
@@ -422,7 +442,7 @@ int clp_scan_pci_devices(void)
 	if (!rrb)
 		return -ENOMEM;
 
-	rc = clp_list_pci(rrb, NULL, __clp_add);
+	rc = clp_list_pci(rrb, scan_list, __clp_add);
 
 	clp_free_block(rrb);
 	return rc;
@@ -650,7 +670,6 @@ static const struct file_operations clp_misc_fops = {
 	.release = clp_misc_release,
 	.unlocked_ioctl = clp_misc_ioctl,
 	.compat_ioctl = clp_misc_ioctl,
-	.llseek = no_llseek,
 };
 
 static struct miscdevice clp_misc_device = {
@@ -659,9 +678,4 @@ static struct miscdevice clp_misc_device = {
 	.fops = &clp_misc_fops,
 };
 
-static int __init clp_misc_init(void)
-{
-	return misc_register(&clp_misc_device);
-}
-
-device_initcall(clp_misc_init);
+builtin_misc_device(clp_misc_device);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98eec13..38014206c16b 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
 };
 
 static char *pci_sw_names[] = {
-	"Allocated pages",
 	"Mapped pages",
 	"Unmapped pages",
+	"Global RPCITs",
+	"Sync Map RPCITs",
+	"Sync RPCITs",
 };
 
 static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -70,12 +72,22 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
 static void pci_sw_counter_show(struct seq_file *m)
 {
 	struct zpci_dev *zdev = m->private;
-	atomic64_t *counter = &zdev->allocated_pages;
+	struct zpci_iommu_ctrs *ctrs;
+	atomic64_t *counter;
+	unsigned long flags;
 	int i;
 
+	spin_lock_irqsave(&zdev->dom_lock, flags);
+	ctrs = zpci_get_iommu_ctrs(m->private);
+	if (!ctrs)
+		goto unlock;
+
+	counter = &ctrs->mapped_pages;
 	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
 		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
 			   atomic64_read(counter));
+unlock:
+	spin_unlock_irqrestore(&zdev->dom_lock, flags);
 }
 
 static int pci_perf_show(struct seq_file *m, void *v)
@@ -85,9 +97,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
 	if (!zdev)
 		return 0;
 
-	mutex_lock(&zdev->lock);
+	mutex_lock(&zdev->fmb_lock);
 	if (!zdev->fmb) {
-		mutex_unlock(&zdev->lock);
+		mutex_unlock(&zdev->fmb_lock);
 		seq_puts(m, "FMB statistics disabled\n");
 		return 0;
 	}
@@ -124,7 +136,7 @@ static int pci_perf_show(struct seq_file *m, void *v)
 	}
 
 	pci_sw_counter_show(m);
-	mutex_unlock(&zdev->lock);
+	mutex_unlock(&zdev->fmb_lock);
 	return 0;
 }
 
@@ -142,7 +154,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
 	if (rc)
 		return rc;
 
-	mutex_lock(&zdev->lock);
+	mutex_lock(&zdev->fmb_lock);
 	switch (val) {
 	case 0:
 		rc = zpci_fmb_disable_device(zdev);
@@ -151,7 +163,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
 		rc = zpci_fmb_enable_device(zdev);
 		break;
 	}
-	mutex_unlock(&zdev->lock);
+	mutex_unlock(&zdev->fmb_lock);
 	return rc ? rc : count;
 }
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index f46833a25526..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,715 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- *   Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
-	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
-				  zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(void)
-{
-	unsigned long *table, *entry;
-
-	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
-	if (!table)
-		return NULL;
-
-	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
-		*entry = ZPCI_TABLE_INVALID;
-	return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
-	kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(void)
-{
-	unsigned long *table, *entry;
-
-	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
-	if (!table)
-		return NULL;
-
-	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
-		*entry = ZPCI_PTE_INVALID;
-	return table;
-}
-
-static void dma_free_page_table(void *table)
-{
-	kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
-{
-	unsigned long *sto;
-
-	if (reg_entry_isvalid(*entry))
-		sto = get_rt_sto(*entry);
-	else {
-		sto = dma_alloc_cpu_table();
-		if (!sto)
-			return NULL;
-
-		set_rt_sto(entry, virt_to_phys(sto));
-		validate_rt_entry(entry);
-		entry_clr_protected(entry);
-	}
-	return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *entry)
-{
-	unsigned long *pto;
-
-	if (reg_entry_isvalid(*entry))
-		pto = get_st_pto(*entry);
-	else {
-		pto = dma_alloc_page_table();
-		if (!pto)
-			return NULL;
-		set_st_pto(entry, virt_to_phys(pto));
-		validate_st_entry(entry);
-		entry_clr_protected(entry);
-	}
-	return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
-{
-	unsigned long *sto, *pto;
-	unsigned int rtx, sx, px;
-
-	rtx = calc_rtx(dma_addr);
-	sto = dma_get_seg_table_origin(&rto[rtx]);
-	if (!sto)
-		return NULL;
-
-	sx = calc_sx(dma_addr);
-	pto = dma_get_page_table_origin(&sto[sx]);
-	if (!pto)
-		return NULL;
-
-	px = calc_px(dma_addr);
-	return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
-{
-	if (flags & ZPCI_PTE_INVALID) {
-		invalidate_pt_entry(entry);
-	} else {
-		set_pt_pfaa(entry, page_addr);
-		validate_pt_entry(entry);
-	}
-
-	if (flags & ZPCI_TABLE_PROTECTED)
-		entry_set_protected(entry);
-	else
-		entry_clr_protected(entry);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
-			      dma_addr_t dma_addr, size_t size, int flags)
-{
-	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	phys_addr_t page_addr = (pa & PAGE_MASK);
-	unsigned long irq_flags;
-	unsigned long *entry;
-	int i, rc = 0;
-
-	if (!nr_pages)
-		return -EINVAL;
-
-	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
-	if (!zdev->dma_table) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
-
-	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
-		if (!entry) {
-			rc = -ENOMEM;
-			goto undo_cpu_trans;
-		}
-		dma_update_cpu_trans(entry, page_addr, flags);
-		page_addr += PAGE_SIZE;
-		dma_addr += PAGE_SIZE;
-	}
-
-undo_cpu_trans:
-	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
-		flags = ZPCI_PTE_INVALID;
-		while (i-- > 0) {
-			page_addr -= PAGE_SIZE;
-			dma_addr -= PAGE_SIZE;
-			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
-			if (!entry)
-				break;
-			dma_update_cpu_trans(entry, page_addr, flags);
-		}
-	}
-out_unlock:
-	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
-	return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
-			   size_t size, int flags)
-{
-	unsigned long irqflags;
-	int ret;
-
-	/*
-	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
-	 * translations when previously invalid translation-table entries are
-	 * validated. With lazy unmap, rpcit is skipped for previously valid
-	 * entries, but a global rpcit is then required before any address can
-	 * be re-used, i.e. after each iommu bitmap wrap-around.
-	 */
-	if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
-		if (!zdev->tlb_refresh)
-			return 0;
-	} else {
-		if (!s390_iommu_strict)
-			return 0;
-	}
-
-	ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
-				 PAGE_ALIGN(size));
-	if (ret == -ENOMEM && !s390_iommu_strict) {
-		/* enable the hypervisor to free some resources */
-		if (zpci_refresh_global(zdev))
-			goto out;
-
-		spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
-		bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
-			      zdev->lazy_bitmap, zdev->iommu_pages);
-		bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
-		spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
-		ret = 0;
-	}
-out:
-	return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
-			    dma_addr_t dma_addr, size_t size, int flags)
-{
-	int rc;
-
-	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
-	if (rc)
-		return rc;
-
-	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
-	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
-		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
-	return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
-	unsigned long *sto = get_rt_sto(entry);
-	int sx;
-
-	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
-		if (reg_entry_isvalid(sto[sx]))
-			dma_free_page_table(get_st_pto(sto[sx]));
-
-	dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
-	int rtx;
-
-	if (!table)
-		return;
-
-	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
-		if (reg_entry_isvalid(table[rtx]))
-			dma_free_seg_table(table[rtx]);
-
-	dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
-				       unsigned long start, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
-	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
-				start, size, zdev->start_dma >> PAGE_SHIFT,
-				dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
-				0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long offset, flags;
-
-	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
-	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
-	if (offset == -1) {
-		if (!s390_iommu_strict) {
-			/* global flush before DMA addresses are reused */
-			if (zpci_refresh_global(zdev))
-				goto out_error;
-
-			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
-				      zdev->lazy_bitmap, zdev->iommu_pages);
-			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
-		}
-		/* wrap-around */
-		offset = __dma_alloc_iommu(dev, 0, size);
-		if (offset == -1)
-			goto out_error;
-	}
-	zdev->next_bit = offset + size;
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
-	return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-	return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long flags, offset;
-
-	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
-	if (!zdev->iommu_bitmap)
-		goto out;
-
-	if (s390_iommu_strict)
-		bitmap_clear(zdev->iommu_bitmap, offset, size);
-	else
-		bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
-	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
-	struct {
-		unsigned long rc;
-		unsigned long addr;
-	} __packed data = {rc, addr};
-
-	zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction direction,
-				     unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long pa = page_to_phys(page) + offset;
-	int flags = ZPCI_PTE_VALID;
-	unsigned long nr_pages;
-	dma_addr_t dma_addr;
-	int ret;
-
-	/* This rounds up number of pages based on size and offset */
-	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	dma_addr = dma_alloc_address(dev, nr_pages);
-	if (dma_addr == DMA_MAPPING_ERROR) {
-		ret = -ENOSPC;
-		goto out_err;
-	}
-
-	/* Use rounded up size */
-	size = nr_pages * PAGE_SIZE;
-
-	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
-		flags |= ZPCI_TABLE_PROTECTED;
-
-	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
-	if (ret)
-		goto out_free;
-
-	atomic64_add(nr_pages, &zdev->mapped_pages);
-	return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
-	dma_free_address(dev, dma_addr, nr_pages);
-out_err:
-	zpci_err("map error:\n");
-	zpci_err_dma(ret, pa);
-	return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
-				 size_t size, enum dma_data_direction direction,
-				 unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	int npages, ret;
-
-	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-	dma_addr = dma_addr & PAGE_MASK;
-	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
-			       ZPCI_PTE_INVALID);
-	if (ret) {
-		zpci_err("unmap error:\n");
-		zpci_err_dma(ret, dma_addr);
-		return;
-	}
-
-	atomic64_add(npages, &zdev->unmapped_pages);
-	dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t flag,
-			    unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	struct page *page;
-	phys_addr_t pa;
-	dma_addr_t map;
-
-	size = PAGE_ALIGN(size);
-	page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-	if (!page)
-		return NULL;
-
-	pa = page_to_phys(page);
-	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
-	if (dma_mapping_error(dev, map)) {
-		__free_pages(page, get_order(size));
-		return NULL;
-	}
-
-	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
-	if (dma_handle)
-		*dma_handle = map;
-	return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
-			  void *vaddr, dma_addr_t dma_handle,
-			  unsigned long attrs)
-{
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
-	size = PAGE_ALIGN(size);
-	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
-	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     size_t size, dma_addr_t *handle,
-			     enum dma_data_direction dir)
-{
-	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	dma_addr_t dma_addr_base, dma_addr;
-	int flags = ZPCI_PTE_VALID;
-	struct scatterlist *s;
-	phys_addr_t pa = 0;
-	int ret;
-
-	dma_addr_base = dma_alloc_address(dev, nr_pages);
-	if (dma_addr_base == DMA_MAPPING_ERROR)
-		return -ENOMEM;
-
-	dma_addr = dma_addr_base;
-	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
-		flags |= ZPCI_TABLE_PROTECTED;
-
-	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
-		pa = page_to_phys(sg_page(s));
-		ret = __dma_update_trans(zdev, pa, dma_addr,
-					 s->offset + s->length, flags);
-		if (ret)
-			goto unmap;
-
-		dma_addr += s->offset + s->length;
-	}
-	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
-	if (ret)
-		goto unmap;
-
-	*handle = dma_addr_base;
-	atomic64_add(nr_pages, &zdev->mapped_pages);
-
-	return ret;
-
-unmap:
-	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
-			 ZPCI_PTE_INVALID);
-	dma_free_address(dev, dma_addr_base, nr_pages);
-	zpci_err("map error:\n");
-	zpci_err_dma(ret, pa);
-	return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			   int nr_elements, enum dma_data_direction dir,
-			   unsigned long attrs)
-{
-	struct scatterlist *s = sg, *start = sg, *dma = sg;
-	unsigned int max = dma_get_max_seg_size(dev);
-	unsigned int size = s->offset + s->length;
-	unsigned int offset = s->offset;
-	int count = 0, i, ret;
-
-	for (i = 1; i < nr_elements; i++) {
-		s = sg_next(s);
-
-		s->dma_length = 0;
-
-		if (s->offset || (size & ~PAGE_MASK) ||
-		    size + s->length > max) {
-			ret = __s390_dma_map_sg(dev, start, size,
-						&dma->dma_address, dir);
-			if (ret)
-				goto unmap;
-
-			dma->dma_address += offset;
-			dma->dma_length = size - offset;
-
-			size = offset = s->offset;
-			start = s;
-			dma = sg_next(dma);
-			count++;
-		}
-		size += s->length;
-	}
-	ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
-	if (ret)
-		goto unmap;
-
-	dma->dma_address += offset;
-	dma->dma_length = size - offset;
-
-	return count + 1;
-unmap:
-	for_each_sg(sg, s, count, i)
-		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
-				     dir, attrs);
-
-	return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-			      int nr_elements, enum dma_data_direction dir,
-			      unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nr_elements, i) {
-		if (s->dma_length)
-			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, attrs);
-		s->dma_address = 0;
-		s->dma_length = 0;
-	}
-}
-	
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
-	int rc;
-
-	/*
-	 * At this point, if the device is part of an IOMMU domain, this would
-	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
-	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
-	 */
-	WARN_ON(zdev->s390_domain);
-
-	spin_lock_init(&zdev->iommu_bitmap_lock);
-	spin_lock_init(&zdev->dma_table_lock);
-
-	zdev->dma_table = dma_alloc_cpu_table();
-	if (!zdev->dma_table) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	/*
-	 * Restrict the iommu bitmap size to the minimum of the following:
-	 * - s390_iommu_aperture which defaults to high_memory
-	 * - 3-level pagetable address limit minus start_dma offset
-	 * - DMA address range allowed by the hardware (clp query pci fn)
-	 *
-	 * Also set zdev->end_dma to the actual end address of the usable
-	 * range, instead of the theoretical maximum as reported by hardware.
-	 *
-	 * This limits the number of concurrently usable DMA mappings since
-	 * for each DMA mapped memory address we need a DMA address including
-	 * extra DMA addresses for multiple mappings of the same memory address.
-	 */
-	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
-	zdev->iommu_size = min3(s390_iommu_aperture,
-				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
-				zdev->end_dma - zdev->start_dma + 1);
-	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
-	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
-	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
-	if (!zdev->iommu_bitmap) {
-		rc = -ENOMEM;
-		goto free_dma_table;
-	}
-	if (!s390_iommu_strict) {
-		zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
-		if (!zdev->lazy_bitmap) {
-			rc = -ENOMEM;
-			goto free_bitmap;
-		}
-
-	}
-	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			       virt_to_phys(zdev->dma_table))) {
-		rc = -EIO;
-		goto free_bitmap;
-	}
-
-	return 0;
-free_bitmap:
-	vfree(zdev->iommu_bitmap);
-	zdev->iommu_bitmap = NULL;
-	vfree(zdev->lazy_bitmap);
-	zdev->lazy_bitmap = NULL;
-free_dma_table:
-	dma_free_cpu_table(zdev->dma_table);
-	zdev->dma_table = NULL;
-out:
-	return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
-	int cc = 0;
-
-	/*
-	 * At this point, if the device is part of an IOMMU domain, this would
-	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
-	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
-	 */
-	WARN_ON(zdev->s390_domain);
-	if (zdev_enabled(zdev))
-		cc = zpci_unregister_ioat(zdev, 0);
-	/*
-	 * cc == 3 indicates the function is gone already. This can happen
-	 * if the function was deconfigured/disabled suddenly and we have not
-	 * received a new handle yet.
-	 */
-	if (cc && cc != 3)
-		return -EIO;
-
-	dma_cleanup_tables(zdev->dma_table);
-	zdev->dma_table = NULL;
-	vfree(zdev->iommu_bitmap);
-	zdev->iommu_bitmap = NULL;
-	vfree(zdev->lazy_bitmap);
-	zdev->lazy_bitmap = NULL;
-	zdev->next_bit = 0;
-	return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
-	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
-					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
-					0, NULL);
-	if (!dma_region_table_cache)
-		return -ENOMEM;
-
-	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
-					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
-					0, NULL);
-	if (!dma_page_table_cache) {
-		kmem_cache_destroy(dma_region_table_cache);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-int __init zpci_dma_init(void)
-{
-	s390_iommu_aperture = (u64)high_memory;
-	if (!s390_iommu_aperture_factor)
-		s390_iommu_aperture = ULONG_MAX;
-	else
-		s390_iommu_aperture *= s390_iommu_aperture_factor;
-
-	return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
-	kmem_cache_destroy(dma_page_table_cache);
-	kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
-	.alloc		= s390_dma_alloc,
-	.free		= s390_dma_free,
-	.map_sg		= s390_dma_map_sg,
-	.unmap_sg	= s390_dma_unmap_sg,
-	.map_page	= s390_dma_map_pages,
-	.unmap_page	= s390_dma_unmap_pages,
-	.mmap		= dma_common_mmap,
-	.get_sgtable	= dma_common_get_sgtable,
-	.alloc_pages	= dma_common_alloc_pages,
-	.free_pages	= dma_common_free_pages,
-	/* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
-	if (!strcmp(str, "strict"))
-		s390_iommu_strict = 1;
-	return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
-	if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
-		s390_iommu_aperture_factor = 1;
-	return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2eb94..2fbee3887d13 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -16,6 +16,7 @@
 #include <asm/sclp.h>
 
 #include "pci_bus.h"
+#include "pci_report.h"
 
 /* Content Code Description for PCI Function Error */
 struct zpci_ccdf_err {
@@ -59,9 +60,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
 	}
 }
 
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
 {
-	return zdev->s390_domain;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	bool ret;
+
+	mutex_lock(&zdev->kzdev_lock);
+	ret = !!zdev->kzdev;
+	mutex_unlock(&zdev->kzdev_lock);
+
+	return ret;
 }
 
 static bool is_driver_supported(struct pci_driver *driver)
@@ -162,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 {
 	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	char *status_str = "success";
 	struct pci_driver *driver;
 
 	/*
@@ -176,32 +186,40 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	}
 	pdev->error_state = pci_channel_io_frozen;
 
-	if (is_passed_through(to_zpci(pdev))) {
+	if (is_passed_through(pdev)) {
 		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
 			pci_name(pdev));
+		status_str = "failed (pass-through)";
 		goto out_unlock;
 	}
 
 	driver = to_pci_driver(pdev->dev.driver);
 	if (!is_driver_supported(driver)) {
-		if (!driver)
+		if (!driver) {
 			pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
 				pci_name(pdev));
-		else
+			status_str = "failed (no driver)";
+		} else {
 			pr_info("%s: The %s driver bound to the device does not support error recovery\n",
 				pci_name(pdev),
 				driver->name);
+			status_str = "failed (no driver support)";
+		}
 		goto out_unlock;
 	}
 
 	ers_res = zpci_event_notify_error_detected(pdev, driver);
-	if (ers_result_indicates_abort(ers_res))
+	if (ers_result_indicates_abort(ers_res)) {
+		status_str = "failed (abort on detection)";
 		goto out_unlock;
+	}
 
 	if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
 		ers_res = zpci_event_do_error_state_clear(pdev, driver);
-		if (ers_result_indicates_abort(ers_res))
+		if (ers_result_indicates_abort(ers_res)) {
+			status_str = "failed (abort on MMIO enable)";
 			goto out_unlock;
+		}
 	}
 
 	if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -210,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 	if (ers_res != PCI_ERS_RESULT_RECOVERED) {
 		pr_err("%s: Automatic recovery failed; operator intervention is required\n",
 		       pci_name(pdev));
+		status_str = "failed (driver can't recover)";
 		goto out_unlock;
 	}
 
@@ -218,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
 		driver->err_handler->resume(pdev);
 out_unlock:
 	pci_dev_unlock(pdev);
+	zpci_report_status(zdev, "recovery", status_str);
 
 	return ers_res;
 }
@@ -239,7 +259,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
 	 * we will inject the error event and let the guest recover the device
 	 * itself.
 	 */
-	if (is_passed_through(to_zpci(pdev)))
+	if (is_passed_through(pdev))
 		goto out;
 	driver = to_pci_driver(pdev->dev.driver);
 	if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -260,6 +280,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
 	if (zdev) {
+		mutex_lock(&zdev->state_lock);
 		zpci_update_fh(zdev, ccdf->fh);
 		if (zdev->zbus->bus)
 			pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -272,21 +293,24 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 		goto no_pdev;
 
 	switch (ccdf->pec) {
-	case 0x003a: /* Service Action or Error Recovery Successful */
+	case 0x002a: /* Error event concerns FMB */
+	case 0x002b:
+	case 0x002c:
+		break;
+	case 0x0040: /* Service Action or Error Recovery Failed */
+	case 0x003b:
+		zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+		break;
+	default: /* PCI function left in the error state attempt to recover */
 		ers_res = zpci_event_attempt_error_recovery(pdev);
 		if (ers_res != PCI_ERS_RESULT_RECOVERED)
 			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
 		break;
-	default:
-		/*
-		 * Mark as frozen not permanently failed because the device
-		 * could be subsequently recovered by the platform.
-		 */
-		zpci_event_io_failure(pdev, pci_channel_io_frozen);
-		break;
 	}
 	pci_dev_put(pdev);
 no_pdev:
+	if (zdev)
+		mutex_unlock(&zdev->state_lock);
 	zpci_zdev_put(zdev);
 }
 
@@ -306,13 +330,27 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
 	/* Even though the device is already gone we still
 	 * need to free zPCI resources as part of the disable.
 	 */
-	if (zdev->dma_table)
-		zpci_dma_exit_device(zdev);
 	if (zdev_enabled(zdev))
 		zpci_disable_device(zdev);
 	zdev->state = ZPCI_FN_STATE_STANDBY;
 }
 
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+	lockdep_assert_held(&zdev->state_lock);
+	/*
+	 * The zdev is in the reserved state. This means that it was presumed to
+	 * go away but there are still undropped references. Now, the platform
+	 * announced its availability again. Bring back the lingering zdev
+	 * to standby. This is safe because we hold a temporary reference
+	 * now so that it won't go away. Account for the re-appearance of the
+	 * underlying device by incrementing the reference count.
+	 */
+	zdev->state = ZPCI_FN_STATE_STANDBY;
+	zpci_zdev_get(zdev);
+	zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -321,29 +359,48 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 
 	zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
 		 ccdf->fid, ccdf->fh, ccdf->pec);
+
+	if (existing_zdev)
+		mutex_lock(&zdev->state_lock);
+
 	switch (ccdf->pec) {
 	case 0x0301: /* Reserved|Standby -> Configured */
 		if (!zdev) {
 			zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
 			if (IS_ERR(zdev))
 				break;
+			if (zpci_add_device(zdev)) {
+				kfree(zdev);
+				break;
+			}
 		} else {
+			if (zdev->state == ZPCI_FN_STATE_RESERVED)
+				zpci_event_reappear(zdev);
 			/* the configuration request may be stale */
-			if (zdev->state != ZPCI_FN_STATE_STANDBY)
+			else if (zdev->state != ZPCI_FN_STATE_STANDBY)
 				break;
 			zdev->state = ZPCI_FN_STATE_CONFIGURED;
 		}
 		zpci_scan_configured_device(zdev, ccdf->fh);
 		break;
 	case 0x0302: /* Reserved -> Standby */
-		if (!zdev)
-			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
-		else
+		if (!zdev) {
+			zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+			if (IS_ERR(zdev))
+				break;
+			if (zpci_add_device(zdev)) {
+				kfree(zdev);
+				break;
+			}
+		} else {
+			if (zdev->state == ZPCI_FN_STATE_RESERVED)
+				zpci_event_reappear(zdev);
 			zpci_update_fh(zdev, ccdf->fh);
+		}
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (zdev) {
-			/* The event may have been queued before we confirgured
+			/* The event may have been queued before we configured
 			 * the device.
 			 */
 			if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
@@ -354,7 +411,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 	case 0x0304: /* Configured -> Standby|Reserved */
 		if (zdev) {
-			/* The event may have been queued before we confirgured
+			/* The event may have been queued before we configured
 			 * the device.:
 			 */
 			if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
@@ -368,7 +425,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 	case 0x0306: /* 0x308 or 0x302 for multiple devices */
 		zpci_remove_reserved_devices();
-		clp_scan_pci_devices();
+		zpci_scan_devices();
 		break;
 	case 0x0308: /* Standby -> Reserved */
 		if (!zdev)
@@ -378,8 +435,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 	default:
 		break;
 	}
-	if (existing_zdev)
+	if (existing_zdev) {
+		mutex_unlock(&zdev->state_lock);
 		zpci_zdev_put(zdev);
+	}
 }
 
 void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c
new file mode 100644
index 000000000000..35688b645098
--- /dev/null
+++ b/arch/s390/pci/pci_fixup.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exceptions for specific devices,
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ */
+#include <linux/pci.h>
+
+static void zpci_ism_bar_no_mmap(struct pci_dev *pdev)
+{
+	/*
+	 * ISM's BAR is special. Drivers written for ISM know
+	 * how to handle this but others need to be aware of their
+	 * special nature e.g. to prevent attempts to mmap() it.
+	 */
+	pdev->non_mappable_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM,
+			PCI_DEVICE_ID_IBM_ISM,
+			zpci_ism_bar_no_mmap);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 1a822b7799f8..eb978c8012be 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -15,6 +15,7 @@
 #include <asm/pci_debug.h>
 #include <asm/pci_io.h>
 #include <asm/processor.h>
+#include <asm/asm.h>
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 
@@ -57,16 +58,16 @@ static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
 /* Modify PCI Function Controls */
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 {
-	u8 cc;
+	int cc;
 
 	asm volatile (
 		"	.insn	rxy,0xe300000000d0,%[req],%[fib]\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
-		: : "cc");
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [req] "+d" (req), [fib] "+Q" (*fib)
+		:
+		: CC_CLOBBER);
 	*status = req >> 24 & 0xff;
-	return cc;
+	return CC_TRANSFORM(cc);
 }
 
 u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
@@ -92,22 +93,22 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
 
 	return cc;
 }
+EXPORT_SYMBOL_GPL(zpci_mod_fc);
 
 /* Refresh PCI Translations */
 static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
 {
 	union register_pair addr_range = {.even = addr, .odd = range};
-	u8 cc;
+	int cc;
 
 	asm volatile (
 		"	.insn	rre,0xb9d30000,%[fn],%[addr_range]\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=d" (cc), [fn] "+d" (fn)
+		CC_IPM(cc)
+		: CC_OUT(cc, cc), [fn] "+d" (fn)
 		: [addr_range] "d" (addr_range.pair)
-		: "cc");
+		: CC_CLOBBER);
 	*status = fn >> 24 & 0xff;
-	return cc;
+	return CC_TRANSFORM(cc);
 }
 
 int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
@@ -138,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 }
 
 /* Set Interruption Controls */
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
 {
 	if (!test_facility(72))
 		return -EIO;
@@ -149,25 +150,29 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
 
 /* PCI Load */
 static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 {
 	union register_pair req_off = {.even = req, .odd = offset};
-	int cc = -ENXIO;
+	int cc, exception;
 	u64 __data;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"	.insn	rre,0xb9d20000,%[data],%[req_off]\n"
-		"0:	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [data] "=d" (__data),
-		  [req_off] "+&d" (req_off.pair) :: "cc");
+		: CC_OUT(cc, cc), [data] "=d" (__data),
+		  [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
+		:
+		: CC_CLOBBER);
 	*status = req_off.even >> 24 & 0xff;
 	*data = __data;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
@@ -220,20 +225,23 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
 static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
 {
 	union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
-	int cc = -ENXIO;
+	int cc, exception;
 	u64 __data;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"       .insn   rre,0xb9d60000,%[data],%[ioaddr_len]\n"
-		"0:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [data] "=d" (__data),
-		  [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+		: CC_OUT(cc, cc), [data] "=d" (__data),
+		  [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
+		:
+		: CC_CLOBBER);
 	*status = ioaddr_len.odd >> 24 & 0xff;
 	*data = __data;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
@@ -256,19 +264,20 @@ EXPORT_SYMBOL_GPL(zpci_load);
 static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
 {
 	union register_pair req_off = {.even = req, .odd = offset};
-	int cc = -ENXIO;
+	int cc, exception;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"	.insn	rre,0xb9d00000,%[data],%[req_off]\n"
-		"0:	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [req_off] "+&d" (req_off.pair)
+		: CC_OUT(cc, cc), [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
 		: [data] "d" (data)
-		: "cc");
+		: CC_CLOBBER);
 	*status = req_off.even >> 24 & 0xff;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 int __zpci_store(u64 data, u64 req, u64 offset)
@@ -309,19 +318,20 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
 static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
 {
 	union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
-	int cc = -ENXIO;
+	int cc, exception;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"       .insn   rre,0xb9d40000,%[data],%[ioaddr_len]\n"
-		"0:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+		: CC_OUT(cc, cc), [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
 		: [data] "d" (data)
-		: "cc", "memory");
+		: CC_CLOBBER_LIST("memory"));
 	*status = ioaddr_len.odd >> 24 & 0xff;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
@@ -343,19 +353,20 @@ EXPORT_SYMBOL_GPL(zpci_store);
 /* PCI Store Block */
 static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
 {
-	int cc = -ENXIO;
+	int cc, exception;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"	.insn	rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
-		"0:	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [req] "+d" (req)
+		: CC_OUT(cc, cc), [req] "+d" (req), [exc] "+d" (exception)
 		: [offset] "d" (offset), [data] "Q" (*data)
-		: "cc");
+		: CC_CLOBBER);
 	*status = req >> 24 & 0xff;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 int __zpci_store_block(const u64 *data, u64 req, u64 offset)
@@ -396,19 +407,20 @@ static inline int zpci_write_block_fh(volatile void __iomem *dst,
 
 static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
 {
-	int cc = -ENXIO;
+	int cc, exception;
 
-	asm volatile (
+	exception = 1;
+	asm_inline volatile (
 		"       .insn   rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
-		"0:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
+		"0:	lhi	%[exc],0\n"
 		"1:\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 1b)
-		: [cc] "+d" (cc), [len] "+d" (len)
+		: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
 		: [ioaddr] "d" (ioaddr), [data] "Q" (*data)
-		: "cc");
+		: CC_CLOBBER);
 	*status = len >> 24 & 0xff;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 int zpci_write_block(volatile void __iomem *dst,
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..191e56a623f6 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
 	return 0;
 }
 
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus:	The bus that the PCI function is on, or would be added on
+ * @zdev:	The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
 {
-	int i, cand_devfn;
-	struct zpci_dev *zdev;
+	int i, vfid, devfn, cand_devfn;
 	struct pci_dev *pdev;
-	int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
-	int rc = 0;
 
 	if (!zbus->multifunction)
-		return 0;
-
-	/* If the parent PF for the given VF is also configured in the
+		return NULL;
+	/* Non-VFs and VFs without RID available don't have a parent */
+	if (!zdev->vfn || !zdev->rid_available)
+		return NULL;
+	/* Linux vfid starts at 0 vfn at 1 */
+	vfid = zdev->vfn - 1;
+	devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+	/*
+	 * If the parent PF for the given VF is also configured in the
 	 * instance, it must be on the same zbus.
 	 * We can then identify the parent PF by checking what
 	 * devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
 			if (!pdev)
 				continue;
 			cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
-			if (cand_devfn == virtfn->devfn) {
-				rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
-				/* balance pci_get_slot() */
-				pci_dev_put(pdev);
-				break;
-			}
+			if (cand_devfn == devfn)
+				return pdev;
 			/* balance pci_get_slot() */
 			pci_dev_put(pdev);
 		}
 	}
+	return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+	struct zpci_dev *zdev = to_zpci(virtfn);
+	struct pci_dev *pdev_pf;
+	int rc = 0;
+
+	pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+	if (pdev_pf) {
+		/* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+		rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+		pci_dev_put(pdev_pf);
+	}
 	return rc;
 }
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index b2c828003bad..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -10,6 +10,8 @@
 #ifndef __S390_PCI_IOV_H
 #define __S390_PCI_IOV_H
 
+#include <linux/pci.h>
+
 #ifdef CONFIG_PCI_IOV
 void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
 
@@ -17,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
 
 int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
 
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
 #else /* CONFIG_PCI_IOV */
 static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
 
@@ -26,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
 {
 	return 0;
 }
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+	return NULL;
+}
 #endif /* CONFIG_PCI_IOV */
 #endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 500cd2dbdf53..84482a921332 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -11,16 +11,10 @@
 
 #include <asm/isc.h>
 #include <asm/airq.h>
+#include <asm/tpi.h>
 
 static enum {FLOATING, DIRECTED} irq_delivery;
 
-#define	SIC_IRQ_MODE_ALL		0
-#define	SIC_IRQ_MODE_SINGLE		1
-#define	SIC_IRQ_MODE_DIRECT		4
-#define	SIC_IRQ_MODE_D_ALL		16
-#define	SIC_IRQ_MODE_D_SINGLE		17
-#define	SIC_IRQ_MODE_SET_CPU		18
-
 /*
  * summary bit vector
  * FLOATING - summary bit per function
@@ -49,6 +43,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
 	fib.fmt0.aibvo = 0;	/* each zdev has its own interrupt vector */
 	fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
 	fib.fmt0.aisbo = zdev->aisb & 63;
+	fib.gd = zdev->gisa;
 
 	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
 }
@@ -60,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
 	struct zpci_fib fib = {0};
 	u8 cc, status;
 
+	fib.gd = zdev->gisa;
+
 	cc = zpci_mod_fc(req, &fib, &status);
 	if (cc == 3 || (cc == 1 && status == 24))
 		/* Function already gone or IRQs already deregistered. */
@@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
 	fib.fmt = 1;
 	fib.fmt1.noi = zdev->msi_nr_irqs;
 	fib.fmt1.dibvo = zdev->msi_first_bit;
+	fib.gd = zdev->gisa;
 
 	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
 }
@@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
 	u8 cc, status;
 
 	fib.fmt = 1;
+	fib.gd = zdev->gisa;
 	cc = zpci_mod_fc(req, &fib, &status);
 	if (cc == 3 || (cc == 1 && status == 24))
 		/* Function already gone or IRQs already deregistered. */
@@ -133,7 +132,7 @@ static int zpci_clear_irq(struct zpci_dev *zdev)
 static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
 				 bool force)
 {
-	struct msi_desc *entry = irq_get_msi_desc(data->irq);
+	struct msi_desc *entry = irq_data_get_msi_desc(data);
 	struct msi_msg msg = entry->msg;
 	int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
 
@@ -153,6 +152,7 @@ static struct irq_chip zpci_irq_chip = {
 static void zpci_handle_cpu_local_irq(bool rescan)
 {
 	struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+	union zpci_sic_iib iib = {{0}};
 	unsigned long bit;
 	int irqs_on = 0;
 
@@ -163,8 +163,8 @@ static void zpci_handle_cpu_local_irq(bool rescan)
 			if (!rescan || irqs_on++)
 				/* End of second scan with interrupts on. */
 				break;
-			/* First scan complete, reenable interrupts. */
-			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
 				break;
 			bit = 0;
 			continue;
@@ -192,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
 static void zpci_handle_fallback_irq(void)
 {
 	struct cpu_irq_data *cpu_data;
+	union zpci_sic_iib iib = {{0}};
 	unsigned long cpu;
 	int irqs_on = 0;
 
@@ -201,8 +202,8 @@ static void zpci_handle_fallback_irq(void)
 			if (irqs_on++)
 				/* End of second scan with interrupts on. */
 				break;
-			/* First scan complete, reenable interrupts. */
-			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
 				break;
 			cpu = 0;
 			continue;
@@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
 	}
 }
 
-static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+				      struct tpi_info *tpi_info)
 {
+	bool floating = !tpi_info->directed_irq;
+
 	if (floating) {
 		inc_irq_stat(IRQIO_PCF);
 		zpci_handle_fallback_irq();
@@ -227,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
 	}
 }
 
-static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+				      struct tpi_info *tpi_info)
 {
+	union zpci_sic_iib iib = {{0}};
 	unsigned long si, ai;
 	struct airq_iv *aibv;
 	int irqs_on = 0;
@@ -241,8 +247,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
 			if (irqs_on++)
 				/* End of second scan with interrupts on. */
 				break;
-			/* First scan complete, reenable interrupts. */
-			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+			/* First scan complete, re-enable interrupts. */
+			if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
 				break;
 			si = 0;
 			continue;
@@ -262,61 +268,87 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
 	}
 }
 
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
+			unsigned long *bit)
 {
-	struct zpci_dev *zdev = to_zpci(pdev);
-	unsigned int hwirq, msi_vecs, cpu;
-	unsigned long bit;
-	struct msi_desc *msi;
-	struct msi_msg msg;
-	int cpu_addr;
-	int rc, irq;
-
-	zdev->aisb = -1UL;
-	zdev->msi_first_bit = -1U;
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
 	if (irq_delivery == DIRECTED) {
 		/* Allocate cpu vector bits */
-		bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
-		if (bit == -1UL)
+		*bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+		if (*bit == -1UL)
 			return -EIO;
 	} else {
 		/* Allocate adapter summary indicator bit */
-		bit = airq_iv_alloc_bit(zpci_sbv);
-		if (bit == -1UL)
+		*bit = airq_iv_alloc_bit(zpci_sbv);
+		if (*bit == -1UL)
 			return -EIO;
-		zdev->aisb = bit;
+		zdev->aisb = *bit;
 
 		/* Create adapter interrupt vector */
-		zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+		zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
 		if (!zdev->aibv)
 			return -ENOMEM;
 
 		/* Wire up shortcut pointer */
-		zpci_ibv[bit] = zdev->aibv;
+		zpci_ibv[*bit] = zdev->aibv;
 		/* Each function has its own interrupt vector */
-		bit = 0;
+		*bit = 0;
+	}
+	return 0;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
+	struct zpci_dev *zdev = to_zpci(pdev);
+	struct msi_desc *msi;
+	struct msi_msg msg;
+	unsigned long bit;
+	int cpu_addr;
+	int rc, irq;
+
+	zdev->aisb = -1UL;
+	zdev->msi_first_bit = -1U;
+
+	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+	if (msi_vecs < nvec) {
+		pr_info("%s requested %d irqs, allocate system limit of %d",
+			pci_name(pdev), nvec, zdev->max_msi);
 	}
 
-	/* Request MSI interrupts */
+	rc = __alloc_airq(zdev, msi_vecs, &bit);
+	if (rc < 0)
+		return rc;
+
+	/*
+	 * Request MSI interrupts:
+	 * When using MSI, nvec_used interrupt sources and their irq
+	 * descriptors are controlled through one msi descriptor.
+	 * Thus the outer loop over msi descriptors shall run only once,
+	 * while two inner loops iterate over the interrupt vectors.
+	 * When using MSI-X, each interrupt vector/irq descriptor
+	 * is bound to exactly one msi descriptor (nvec_used is one).
+	 * So the inner loops are executed once, while the outer iterates
+	 * over the MSI-X descriptors.
+	 */
 	hwirq = bit;
 	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
-		rc = -EIO;
 		if (hwirq - bit >= msi_vecs)
 			break;
-		irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
-				(irq_delivery == DIRECTED) ?
-				msi->affinity : NULL);
+		irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
+		irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
+					(irq_delivery == DIRECTED) ?
+					msi->affinity : NULL);
 		if (irq < 0)
 			return -ENOMEM;
-		rc = irq_set_msi_desc(irq, msi);
-		if (rc)
-			return rc;
-		irq_set_chip_and_handler(irq, &zpci_irq_chip,
-					 handle_percpu_irq);
+
+		for (i = 0; i < irqs_per_msi; i++) {
+			rc = irq_set_msi_desc_off(irq, i, msi);
+			if (rc)
+				return rc;
+			irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
+						 handle_percpu_irq);
+		}
+
 		msg.data = hwirq - bit;
 		if (irq_delivery == DIRECTED) {
 			if (msi->affinity)
@@ -329,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			msg.address_lo |= (cpu_addr << 8);
 
 			for_each_possible_cpu(cpu) {
-				airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+				for (i = 0; i < irqs_per_msi; i++)
+					airq_iv_set_data(zpci_ibv[cpu],
+							 hwirq + i, irq + i);
 			}
 		} else {
 			msg.address_lo = zdev->msi_addr & 0xffffffff;
-			airq_iv_set_data(zdev->aibv, hwirq, irq);
+			for (i = 0; i < irqs_per_msi; i++)
+				airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
 		}
 		msg.address_hi = zdev->msi_addr >> 32;
 		pci_write_msi_msg(irq, &msg);
-		hwirq++;
+		hwirq += irqs_per_msi;
 	}
 
 	zdev->msi_first_bit = bit;
-	zdev->msi_nr_irqs = msi_vecs;
+	zdev->msi_nr_irqs = hwirq - bit;
 
 	rc = zpci_set_irq(zdev);
 	if (rc)
 		return rc;
 
-	return (msi_vecs == nvec) ? 0 : msi_vecs;
+	return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
 }
 
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
 	struct msi_desc *msi;
+	unsigned int i;
 	int rc;
 
 	/* Disable interrupts */
@@ -363,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 
 	/* Release MSI interrupts */
 	msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
-		irq_set_msi_desc(msi->irq, NULL);
-		irq_free_desc(msi->irq);
+		for (i = 0; i < msi->nvec_used; i++) {
+			irq_set_msi_desc(msi->irq + i, NULL);
+			irq_free_desc(msi->irq + i);
+		}
 		msi->msg.address_lo = 0;
 		msi->msg.address_hi = 0;
 		msi->msg.data = 0;
@@ -402,11 +440,12 @@ static struct airq_struct zpci_airq = {
 static void __init cpu_enable_directed_irq(void *unused)
 {
 	union zpci_sic_iib iib = {{0}};
+	union zpci_sic_iib ziib = {{0}};
 
-	iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+	iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
 
-	__zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
-	zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
 }
 
 static int __init zpci_directed_irq_init(void)
@@ -414,14 +453,14 @@ static int __init zpci_directed_irq_init(void)
 	union zpci_sic_iib iib = {{0}};
 	unsigned int cpu;
 
-	zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+	zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
 	if (!zpci_sbv)
 		return -ENOMEM;
 
 	iib.diib.isc = PCI_ISC;
 	iib.diib.nr_cpus = num_possible_cpus();
 	iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
-	__zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
 
 	zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
 			   GFP_KERNEL);
@@ -436,7 +475,7 @@ static int __init zpci_directed_irq_init(void)
 		zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
 					       AIRQ_IV_DATA |
 					       AIRQ_IV_CACHELINE |
-					       (!cpu ? AIRQ_IV_ALLOC : 0));
+					       (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
 		if (!zpci_ibv[cpu])
 			return -ENOMEM;
 	}
@@ -453,7 +492,7 @@ static int __init zpci_floating_irq_init(void)
 	if (!zpci_ibv)
 		return -ENOMEM;
 
-	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
 	if (!zpci_sbv)
 		goto out_free;
 
@@ -466,6 +505,7 @@ out_free:
 
 int __init zpci_irq_init(void)
 {
+	union zpci_sic_iib iib = {{0}};
 	int rc;
 
 	irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
@@ -497,7 +537,7 @@ int __init zpci_irq_init(void)
 	 * Enable floating IRQs (with suppression after one IRQ). When using
 	 * directed IRQs this enables the fallback path.
 	 */
-	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
 
 	return 0;
 out_airq:
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
new file mode 100644
index 000000000000..ff34baf50a3e
--- /dev/null
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2022.  All rights reserved.
+ *	Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/kvm_host.h>
+
+struct zpci_kvm_hook zpci_kvm_hook;
+EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 080c88620723..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -14,6 +14,7 @@
 #include <asm/asm-extable.h>
 #include <asm/pci_io.h>
 #include <asm/pci_debug.h>
+#include <asm/asm.h>
 
 static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
 {
@@ -30,20 +31,24 @@ static inline int __pcistb_mio_inuser(
 		void __iomem *ioaddr, const void __user *src,
 		u64 len, u8 *status)
 {
-	int cc = -ENXIO;
-
-	asm volatile (
-		"       sacf 256\n"
-		"0:     .insn   rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
-		"1:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
-		"2:     sacf 768\n"
+	int cc, exception;
+	bool sacf_flag;
+
+	exception = 1;
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile (
+		"	sacf	256\n"
+		"0:	.insn	rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+		"1:	lhi	%[exc],0\n"
+		"2:	sacf	768\n"
+		CC_IPM(cc)
 		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
-		: [cc] "+d" (cc), [len] "+d" (len)
+		: CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
 		: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
-		: "cc", "memory");
+		: CC_CLOBBER_LIST("memory"));
+	disable_sacf_uaccess(sacf_flag);
 	*status = len >> 24 & 0xff;
-	return cc;
+	return exception ? -ENXIO : CC_TRANSFORM(cc);
 }
 
 static inline int __pcistg_mio_inuser(
@@ -51,7 +56,8 @@ static inline int __pcistg_mio_inuser(
 		u64 ulen, u8 *status)
 {
 	union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
-	int cc = -ENXIO;
+	int cc, exception;
+	bool sacf_flag;
 	u64 val = 0;
 	u64 cnt = ulen;
 	u8 tmp;
@@ -61,25 +67,29 @@ static inline int __pcistg_mio_inuser(
 	 * a register, then store it to PCI at @ioaddr while in secondary
 	 * address space. pcistg then uses the user mappings.
 	 */
-	asm volatile (
-		"       sacf    256\n"
-		"0:     llgc    %[tmp],0(%[src])\n"
-		"       sllg    %[val],%[val],8\n"
-		"       aghi    %[src],1\n"
-		"       ogr     %[val],%[tmp]\n"
-		"       brctg   %[cnt],0b\n"
-		"1:     .insn   rre,0xb9d40000,%[val],%[ioaddr_len]\n"
-		"2:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
-		"3:     sacf    768\n"
-		EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+	exception = 1;
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile (
+		"	sacf	256\n"
+		"0:	llgc	%[tmp],0(%[src])\n"
+		"4:	sllg	%[val],%[val],8\n"
+		"	aghi	%[src],1\n"
+		"	ogr	%[val],%[tmp]\n"
+		"	brctg	%[cnt],0b\n"
+		"1:	.insn	rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+		"2:	lhi	%[exc],0\n"
+		"3:	sacf	768\n"
+		CC_IPM(cc)
+		EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+		: [src] "+a" (src), [cnt] "+d" (cnt),
+		  [val] "+d" (val), [tmp] "=d" (tmp), [exc] "+d" (exception),
+		  CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
 		:
-		[src] "+a" (src), [cnt] "+d" (cnt),
-		[val] "+d" (val), [tmp] "=d" (tmp),
-		[cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
-		:: "cc", "memory");
+		: CC_CLOBBER_LIST("memory"));
+	disable_sacf_uaccess(sacf_flag);
 	*status = ioaddr_len.odd >> 24 & 0xff;
 
+	cc = exception ? -ENXIO : CC_TRANSFORM(cc);
 	/* did we read everything from user memory? */
 	if (!cc && cnt != 0)
 		cc = -EFAULT;
@@ -97,9 +107,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
 		return -EINVAL;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64 __force) dst,
-					       (u64 __force) src, n,
-					       ZPCI_MAX_WRITE_SIZE);
+		size = zpci_get_max_io_size((u64 __force) dst,
+					    (u64 __force) src, n,
+					    ZPCI_MAX_WRITE_SIZE);
 		if (size > 8) /* main path */
 			rc = __pcistb_mio_inuser(dst, src, size, &status);
 		else
@@ -118,12 +128,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
 SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 		const void __user *, user_buffer, size_t, length)
 {
+	struct follow_pfnmap_args args = { };
 	u8 local_buf[64];
 	void __iomem *io_addr;
 	void *buf;
 	struct vm_area_struct *vma;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	long ret;
 
 	if (!zpci_is_enabled())
@@ -169,11 +178,17 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 	if (!(vma->vm_flags & VM_WRITE))
 		goto out_unlock_mmap;
 
-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
-	if (ret)
-		goto out_unlock_mmap;
+	args.address = mmio_addr;
+	args.vma = vma;
+	ret = follow_pfnmap_start(&args);
+	if (ret) {
+		fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+		ret = follow_pfnmap_start(&args);
+		if (ret)
+			goto out_unlock_mmap;
+	}
 
-	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+	io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));
 
 	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -181,7 +196,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
 
 	ret = zpci_memcpy_toio(io_addr, buf, length);
 out_unlock_pt:
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 out_unlock_mmap:
 	mmap_read_unlock(current->mm);
 out_free:
@@ -195,9 +210,10 @@ static inline int __pcilg_mio_inuser(
 		u64 ulen, u8 *status)
 {
 	union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+	bool sacf_flag;
 	u64 cnt = ulen;
 	int shift = ulen * 8;
-	int cc = -ENXIO;
+	int cc, exception;
 	u64 val, tmp;
 
 	/*
@@ -205,27 +221,34 @@ static inline int __pcilg_mio_inuser(
 	 * user space) into a register using pcilg then store these bytes at
 	 * user address @dst
 	 */
-	asm volatile (
-		"       sacf    256\n"
-		"0:     .insn   rre,0xb9d60000,%[val],%[ioaddr_len]\n"
-		"1:     ipm     %[cc]\n"
-		"       srl     %[cc],28\n"
-		"       ltr     %[cc],%[cc]\n"
-		"       jne     4f\n"
-		"2:     ahi     %[shift],-8\n"
-		"       srlg    %[tmp],%[val],0(%[shift])\n"
-		"3:     stc     %[tmp],0(%[dst])\n"
-		"       aghi    %[dst],1\n"
-		"       brctg   %[cnt],2b\n"
-		"4:     sacf    768\n"
-		EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+	exception = 1;
+	sacf_flag = enable_sacf_uaccess();
+	asm_inline volatile (
+		"	sacf	256\n"
+		"0:	.insn	rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+		"1:	lhi	%[exc],0\n"
+		"	jne	4f\n"
+		"2:	ahi	%[shift],-8\n"
+		"	srlg	%[tmp],%[val],0(%[shift])\n"
+		"3:	stc	%[tmp],0(%[dst])\n"
+		"5:	aghi	%[dst],1\n"
+		"	brctg	%[cnt],2b\n"
+		/*
+		 * Use xr to clear exc and set condition code to zero
+		 * to ensure flag output is correct for this branch.
+		 */
+		"	xr	%[exc],%[exc]\n"
+		"4:	sacf	768\n"
+		CC_IPM(cc)
+		EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+		: [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
+		  CC_OUT(cc, cc), [val] "=d" (val),
+		  [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+		  [shift] "+a" (shift)
 		:
-		[ioaddr_len] "+&d" (ioaddr_len.pair),
-		[cc] "+d" (cc), [val] "=d" (val),
-		[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
-		[shift] "+d" (shift)
-		:: "cc", "memory");
-
+		: CC_CLOBBER_LIST("memory"));
+	disable_sacf_uaccess(sacf_flag);
+	cc = exception ? -ENXIO : CC_TRANSFORM(cc);
 	/* did we write everything to the user space buffer? */
 	if (!cc && cnt != 0)
 		cc = -EFAULT;
@@ -242,9 +265,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
 	u8 status;
 
 	while (n > 0) {
-		size = zpci_get_max_write_size((u64 __force) src,
-					       (u64 __force) dst, n,
-					       ZPCI_MAX_READ_SIZE);
+		size = zpci_get_max_io_size((u64 __force) src,
+					    (u64 __force) dst, n,
+					    ZPCI_MAX_READ_SIZE);
 		rc = __pcilg_mio_inuser(dst, src, size, &status);
 		if (rc)
 			break;
@@ -260,12 +283,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
 SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 		void __user *, user_buffer, size_t, length)
 {
+	struct follow_pfnmap_args args = { };
 	u8 local_buf[64];
 	void __iomem *io_addr;
 	void *buf;
 	struct vm_area_struct *vma;
-	pte_t *ptep;
-	spinlock_t *ptl;
 	long ret;
 
 	if (!zpci_is_enabled())
@@ -305,14 +327,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
 		goto out_unlock_mmap;
 	ret = -EACCES;
-	if (!(vma->vm_flags & VM_WRITE))
+	if (!(vma->vm_flags & VM_READ))
 		goto out_unlock_mmap;
 
-	ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
-	if (ret)
-		goto out_unlock_mmap;
+	args.vma = vma;
+	args.address = mmio_addr;
+	ret = follow_pfnmap_start(&args);
+	if (ret) {
+		fixup_user_fault(current->mm, mmio_addr, 0, NULL);
+		ret = follow_pfnmap_start(&args);
+		if (ret)
+			goto out_unlock_mmap;
+	}
 
-	io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+	io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
 			(mmio_addr & ~PAGE_MASK));
 
 	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
@@ -322,7 +350,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
 	ret = zpci_memcpy_fromio(buf, io_addr, length);
 
 out_unlock_pt:
-	pte_unmap_unlock(ptep, ptl);
+	follow_pfnmap_end(&args);
 out_unlock_mmap:
 	mmap_read_unlock(current->mm);
 
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..1b494e5ecc4d
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+	u64 timestamp;
+	u64 err_log_id;
+	char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE	(PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE	(ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+	struct zpci_report_error_header header;
+	struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+	switch (state) {
+	case pci_channel_io_normal:
+		return "normal";
+	case pci_channel_io_frozen:
+		return "frozen";
+	case pci_channel_io_perm_failure:
+		return "permanent-failure";
+	default:
+		return "invalid";
+	};
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+			       int area, debug_entry_t *entry, char *out_buf,
+			       size_t out_buf_size)
+{
+	unsigned long sec, usec;
+	unsigned int level;
+	char *except_str;
+	int rc = 0;
+
+	level = entry->level;
+	sec = entry->clock;
+	usec = do_div(sec, USEC_PER_SEC);
+
+	if (entry->exception)
+		except_str = "*";
+	else
+		except_str = "-";
+	rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u  ",
+			sec, usec, level, except_str,
+			entry->cpu);
+	return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+			       char *out_buf, size_t out_buf_size)
+{
+	return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu  msg\n");
+}
+
+static struct debug_view debug_log_view = {
+	"pci_msg_log",
+	&debug_prolog_header,
+	&debug_log_header_fn,
+	&debug_sprintf_format_fn,
+	NULL,
+	NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev:	The PCI device for which to report status
+ * @operation:	A string representing the operation reported
+ * @status:	A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+	struct zpci_report_error *report;
+	struct pci_driver *driver = NULL;
+	struct pci_dev *pdev = NULL;
+	char *buf, *end;
+	int ret;
+
+	if (!zdev || !zdev->zbus)
+		return -ENODEV;
+
+	/* Protected virtualization hosts get nothing from us */
+	if (prot_virt_guest)
+		return -ENODATA;
+
+	report = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!report)
+		return -ENOMEM;
+	if (zdev->zbus->bus)
+		pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+	if (pdev)
+		driver = to_pci_driver(pdev->dev.driver);
+
+	buf = report->data.log_data;
+	end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+	buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+	buf += scnprintf(buf, end - buf, "status: %s\n", status);
+	buf += scnprintf(buf, end - buf, "state: %s\n",
+			 (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+	buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+	ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+	if (ret < 0)
+		pr_err("Reading PCI debug messages failed with code %d\n", ret);
+	else
+		buf += ret;
+
+	report->header.version = 1;
+	report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+	report->header.length = buf - (char *)&report->data;
+	report->data.timestamp = ktime_get_clocktai_seconds();
+	report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+	ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+	if (ret)
+		pr_err("Reporting PCI status failed with code %d\n", ret);
+	else
+		pr_info("Reported PCI device status\n");
+
+	free_page((unsigned long)report);
+
+	return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c047..0ecad08e1b1e 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -23,7 +23,7 @@ static ssize_t name##_show(struct device *dev,				\
 {									\
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
 									\
-	return sprintf(buf, fmt, zdev->member);				\
+	return sysfs_emit(buf, fmt, zdev->member);				\
 }									\
 static DEVICE_ATTR_RO(name)
 
@@ -34,6 +34,7 @@ zpci_attr(pfgid, "0x%02x\n", pfgid);
 zpci_attr(vfn, "0x%04x\n", vfn);
 zpci_attr(pft, "0x%02x\n", pft);
 zpci_attr(port, "%d\n", port);
+zpci_attr(fidparm, "0x%02x\n", fidparm);
 zpci_attr(uid, "0x%x\n", uid);
 zpci_attr(segment0, "0x%02x\n", pfip[0]);
 zpci_attr(segment1, "0x%02x\n", pfip[1]);
@@ -45,10 +46,34 @@ static ssize_t mio_enabled_show(struct device *dev,
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
-	return sprintf(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
+	return sysfs_emit(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
 }
 static DEVICE_ATTR_RO(mio_enabled);
 
+static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
+{
+	int ret;
+
+	pci_stop_and_remove_bus_device(pdev);
+	if (zdev_enabled(zdev)) {
+		ret = zpci_disable_device(zdev);
+		/*
+		 * Due to a z/VM vs LPAR inconsistency in the error
+		 * state the FH may indicate an enabled device but
+		 * disable says the device is already disabled don't
+		 * treat it as an error here.
+		 */
+		if (ret == -EINVAL)
+			ret = 0;
+		if (ret)
+			return ret;
+	}
+
+	ret = zpci_reenable_device(zdev);
+
+	return ret;
+}
+
 static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
@@ -69,6 +94,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 	 */
 	kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
 	WARN_ON_ONCE(!kn);
+
+	/* Device needs to be configured and state must not change */
+	mutex_lock(&zdev->state_lock);
+	if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+		goto out;
+
 	/* device_remove_file() serializes concurrent calls ignoring all but
 	 * the first
 	 */
@@ -81,39 +112,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 	 */
 	pci_lock_rescan_remove();
 	if (pci_dev_is_added(pdev)) {
-		pci_stop_and_remove_bus_device(pdev);
-		if (zdev->dma_table) {
-			ret = zpci_dma_exit_device(zdev);
-			if (ret)
-				goto out;
-		}
-
-		if (zdev_enabled(zdev)) {
-			ret = zpci_disable_device(zdev);
-			/*
-			 * Due to a z/VM vs LPAR inconsistency in the error
-			 * state the FH may indicate an enabled device but
-			 * disable says the device is already disabled don't
-			 * treat it as an error here.
-			 */
-			if (ret == -EINVAL)
-				ret = 0;
-			if (ret)
-				goto out;
-		}
-
-		ret = zpci_enable_device(zdev);
-		if (ret)
-			goto out;
-		ret = zpci_dma_init_device(zdev);
-		if (ret) {
-			zpci_disable_device(zdev);
-			goto out;
-		}
-		pci_rescan_bus(zdev->zbus->bus);
+		ret = _do_recover(pdev, zdev);
 	}
-out:
+	pci_rescan_bus(zdev->zbus->bus);
 	pci_unlock_rescan_remove();
+
+out:
+	mutex_unlock(&zdev->state_lock);
 	if (kn)
 		sysfs_unbreak_active_protection(kn);
 	return ret ? ret : count;
@@ -121,7 +126,7 @@ out:
 static DEVICE_ATTR_WO(recover);
 
 static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr, char *buf,
+				const struct bin_attribute *attr, char *buf,
 				loff_t off, size_t count)
 {
 	struct device *dev = kobj_to_dev(kobj);
@@ -131,10 +136,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
 }
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
 
 static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr, char *buf,
+				  const struct bin_attribute *attr, char *buf,
 				  loff_t off, size_t count)
 {
 	struct zpci_report_error_header *report = (void *) buf;
@@ -150,7 +155,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
 
 	return ret ? ret : count;
 }
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
 
 static ssize_t uid_is_unique_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
@@ -159,7 +164,6 @@ static ssize_t uid_is_unique_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(uid_is_unique);
 
-#ifndef CONFIG_DMI
 /* analogous to smbios index */
 static ssize_t index_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
@@ -185,13 +189,12 @@ static struct attribute *zpci_ident_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group zpci_ident_attr_group = {
+const struct attribute_group zpci_ident_attr_group = {
 	.attrs = zpci_ident_attrs,
 	.is_visible = zpci_index_is_visible,
 };
-#endif
 
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
 	&bin_attr_util_string,
 	&bin_attr_report_error,
 	NULL,
@@ -204,6 +207,7 @@ static struct attribute *zpci_dev_attrs[] = {
 	&dev_attr_pfgid.attr,
 	&dev_attr_pft.attr,
 	&dev_attr_port.attr,
+	&dev_attr_fidparm.attr,
 	&dev_attr_vfn.attr,
 	&dev_attr_uid.attr,
 	&dev_attr_recover.attr,
@@ -212,9 +216,9 @@ static struct attribute *zpci_dev_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group zpci_attr_group = {
+const struct attribute_group zpci_attr_group = {
 	.attrs = zpci_dev_attrs,
-	.bin_attrs = zpci_bin_attrs,
+	.bin_attrs_new = zpci_bin_attrs,
 };
 
 static struct attribute *pfip_attrs[] = {
@@ -224,16 +228,8 @@ static struct attribute *pfip_attrs[] = {
 	&dev_attr_segment3.attr,
 	NULL,
 };
-static struct attribute_group pfip_attr_group = {
+
+const struct attribute_group pfip_attr_group = {
 	.name = "pfip",
 	.attrs = pfip_attrs,
 };
-
-const struct attribute_group *zpci_attr_groups[] = {
-	&zpci_attr_group,
-	&pfip_attr_group,
-#ifndef CONFIG_DMI
-	&zpci_ident_attr_group,
-#endif
-	NULL,
-};
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index d237bc6841cb..bd39b36e7bd6 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
-OBJECT_FILES_NON_STANDARD := y
-
 purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 
 targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
@@ -10,25 +8,22 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__NO_FORTIFY
 
 $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
 	$(call if_changed_rule,as_o_S)
 
-KCOV_INSTRUMENT := n
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
+KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes
 KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
 KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
-KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
 KBUILD_CFLAGS += -fno-stack-protector
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -D__DISABLE_EXPORTS
 KBUILD_CFLAGS += $(CLANG_FLAGS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS += -D__DISABLE_EXPORTS
 
 # Since we link purgatory with -r unresolved symbols are not checked, so we
 # also link a purgatory.chk binary without -r to check for unresolved symbols.
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 6f835124ee82..db3ab2402621 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -76,9 +76,9 @@
 	diag	%r0,%r1,0x308
 .endm
 
-.text
-.align PAGE_SIZE
-ENTRY(purgatory_start)
+	.text
+	.balign PAGE_SIZE
+SYM_CODE_START(purgatory_start)
 	/* The purgatory might be called after a diag308 so better set
 	 * architecture and addressing mode.
 	 */
@@ -100,7 +100,7 @@ ENTRY(purgatory_start)
 	 * checksum verification only (%r2 = 0 -> verification only).
 	 *
 	 * Check now and preserve over C function call by storing in
-	 * %r10 whith
+	 * %r10 with
 	 *	1 -> checksum verification only
 	 *	0 -> load new kernel
 	 */
@@ -156,7 +156,7 @@ ENTRY(purgatory_start)
 	agr	%r10,%r9
 
 	/* Buffer location (in crash memory) and size. As the purgatory is
-	 * behind the point of no return it can re-use the stack as buffer.
+	 * behind the point of no return it can reuse the stack as buffer.
 	 */
 	larl	%r11,purgatory_end
 	larl	%r12,stack
@@ -245,45 +245,21 @@ ENTRY(purgatory_start)
 
 	/* start crash kernel */
 	START_NEXT_KERNEL .base_dst 1
-
-
-load_psw_mask:
-	.long	0x00080000,0x80000000
-
-	.align	8
-disabled_wait_psw:
-	.quad	0x0002000180000000
-	.quad	0x0000000000000000 + .do_checksum_verification
-
-gprregs:
-	.rept	10
-	.quad	0
-	.endr
-
-/* Macro to define a global variable with name and size (in bytes) to be
- * shared with C code.
- *
- * Add the .size and .type attribute to satisfy checks on the Elf_Sym during
- * purgatory load.
- */
-.macro GLOBAL_VARIABLE name,size
-\name:
-	.global \name
-	.size	\name,\size
-	.type	\name,object
-	.skip	\size,0
-.endm
-
-GLOBAL_VARIABLE purgatory_sha256_digest,32
-GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE
-GLOBAL_VARIABLE kernel_entry,8
-GLOBAL_VARIABLE kernel_type,8
-GLOBAL_VARIABLE crash_start,8
-GLOBAL_VARIABLE crash_size,8
-
-	.align	PAGE_SIZE
-stack:
+SYM_CODE_END(purgatory_start)
+
+SYM_DATA_LOCAL(load_psw_mask,		.long 0x00080000,0x80000000)
+	.balign	8
+SYM_DATA_LOCAL(disabled_wait_psw,	.quad 0x0002000180000000,.do_checksum_verification)
+SYM_DATA_LOCAL(gprregs,			.fill 10,8,0)
+SYM_DATA(purgatory_sha256_digest,	.skip 32)
+SYM_DATA(purgatory_sha_regions,		.skip 16*__KEXEC_SHA_REGION_SIZE)
+SYM_DATA(kernel_entry,			.skip 8)
+SYM_DATA(kernel_type,			.skip 8)
+SYM_DATA(crash_start,			.skip 8)
+SYM_DATA(crash_size,			.skip 8)
+	.balign	PAGE_SIZE
+SYM_DATA_START_LOCAL(stack)
 	/* The buffer to move this code must be as big as the code. */
 	.skip	stack-purgatory_start
-	.align	PAGE_SIZE
-purgatory_end:
+	.balign	PAGE_SIZE
+SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, purgatory_end)
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
index 8293753100ae..25f512b1de12 100644
--- a/arch/s390/purgatory/kexec-purgatory.S
+++ b/arch/s390/purgatory/kexec-purgatory.S
@@ -1,14 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
 
 	.section .rodata, "a"
 
-	.align	8
-kexec_purgatory:
-	.globl	kexec_purgatory
+	.balign	8
+SYM_DATA_START(kexec_purgatory)
 	.incbin	"arch/s390/purgatory/purgatory.ro"
-.Lkexec_purgatroy_end:
+SYM_DATA_END_LABEL(kexec_purgatory, SYM_L_LOCAL, kexec_purgatory_end)
 
-	.align	8
-kexec_purgatory_size:
-	.globl	kexec_purgatory_size
-	.quad	.Lkexec_purgatroy_end - kexec_purgatory
+	.balign	8
+SYM_DATA(kexec_purgatory_size, .quad kexec_purgatory_end-kexec_purgatory)
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
index ea62f37b79ef..e6af51d9d183 100644
--- a/arch/s390/tools/.gitignore
+++ b/arch/s390/tools/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 gen_facilities
 gen_opcode_table
+relocs
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index f9dd47ff9ac4..f2862364fb42 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -25,3 +25,8 @@ $(kapi)/facility-defs.h: $(obj)/gen_facilities FORCE
 
 $(kapi)/dis-defs.h: $(obj)/gen_opcode_table FORCE
 	$(call filechk,dis-defs.h)
+
+hostprogs	+= relocs
+PHONY		+= relocs
+relocs: $(obj)/relocs
+	@:
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 530dd941d140..d5c68ade71ab 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -46,6 +46,7 @@ static struct facility_def facility_defs[] = {
 #endif
 #ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
 			53, /* load-and-zero-rightmost-byte, etc. */
+			129, /* vector */
 #endif
 #ifdef CONFIG_HAVE_MARCH_Z14_FEATURES
 			58, /* miscellaneous-instruction-extension 2 */
@@ -53,6 +54,9 @@ static struct facility_def facility_defs[] = {
 #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES
 			61, /* miscellaneous-instruction-extension 3 */
 #endif
+#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES
+			84, /* miscellaneous-instruction-extension 4 */
+#endif
 			-1 /* END */
 		}
 	},
@@ -108,9 +112,12 @@ static struct facility_def facility_defs[] = {
 			15, /* AP Facilities Test */
 			156, /* etoken facility */
 			165, /* nnpa facility */
+			170, /* ineffective-nonconstrained-transaction facility */
 			193, /* bear enhancement facility */
 			194, /* rdp enhancement facility */
 			196, /* processor activity instrumentation facility */
+			197, /* processor activity instrumentation extension 1 */
+			201, /* concurrent-functions facility */
 			-1  /* END */
 		}
 	},
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index a1bc02b29c81..7d76c417f83f 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b)
 	return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name);
 }
 
+static void print_insn_name(const char *name)
+{
+	size_t i, len;
+
+	len = strlen(name);
+	printf("{");
+	for (i = 0; i < len; i++)
+		printf(" \'%c\',", name[i]);
+	printf(" }");
+}
+
 static void print_long_insn(struct gen_opcode *desc)
 {
 	struct insn *insn;
@@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc)
 		insn = &desc->insn[i];
 		if (insn->name_len < 6)
 			continue;
-		printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name);
+		printf("\t[LONG_INSN_%s] = ", insn->upper);
+		print_insn_name(insn->name);
+		printf(", \\\n");
 	}
 	printf("}\n\n");
 }
@@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr)
 	if (insn->type->byte != 0)
 		opcode += 2;
 	printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format);
-	if (insn->name_len < 6)
-		printf(".name = \"%s\" ", insn->name);
-	else
-		printf(".offset = LONG_INSN_%s ", insn->upper);
-	printf("}, \\\n");
+	if (insn->name_len < 6) {
+		printf(".name =  ");
+		print_insn_name(insn->name);
+	} else {
+		printf(".offset = LONG_INSN_%s", insn->upper);
+	}
+	printf(" }, \\\n");
 }
 
 static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 5f008e794898..def2659f6602 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -527,9 +527,9 @@ b938	sortl	RRE_RR
 b939	dfltcc	RRF_R0RR2
 b93a	kdsa	RRE_RR
 b93b	nnpa	RRE_00
-b93c	ppno	RRE_RR
-b93e	kimd	RRE_RR
-b93f	klmd	RRE_RR
+b93c	prno	RRE_RR
+b93e	kimd	RRF_U0RR
+b93f	klmd	RRF_U0RR
 b941	cfdtr	RRF_UURF
 b942	clgdtr	RRF_UURF
 b943	clfdtr	RRF_UURF
@@ -549,6 +549,10 @@ b964	nngrk	RRF_R0RR2
 b965	ocgrk	RRF_R0RR2
 b966	nogrk	RRF_R0RR2
 b967	nxgrk	RRF_R0RR2
+b968	clzg	RRE_RR
+b969	ctzg	RRE_RR
+b96c	bextg	RRF_R0RR2
+b96d	bdepg	RRF_R0RR2
 b972	crt	RRF_U0RR
 b973	clrt	RRF_U0RR
 b974	nnrk	RRF_R0RR2
@@ -796,6 +800,16 @@ e35b	sy	RXY_RRRD
 e35c	mfy	RXY_RRRD
 e35e	aly	RXY_RRRD
 e35f	sly	RXY_RRRD
+e360	lxab	RXY_RRRD
+e361	llxab	RXY_RRRD
+e362	lxah	RXY_RRRD
+e363	llxah	RXY_RRRD
+e364	lxaf	RXY_RRRD
+e365	llxaf	RXY_RRRD
+e366	lxag	RXY_RRRD
+e367	llxag	RXY_RRRD
+e368	lxaq	RXY_RRRD
+e369	llxaq	RXY_RRRD
 e370	sthy	RXY_RRRD
 e371	lay	RXY_RRRD
 e372	stcy	RXY_RRRD
@@ -880,6 +894,8 @@ e63c	vupkz	VSI_URDV
 e63d	vstrl	VSI_URDV
 e63f	vstrlr	VRS_RRDV
 e649	vlip	VRI_V0UU2
+e64a	vcvdq	VRI_VV0UU
+e64e	vcvbq	VRR_VV0U2
 e650	vcvb	VRR_RV0UU
 e651	vclzdp	VRR_VV0U2
 e652	vcvbg	VRR_RV0UU
@@ -893,7 +909,7 @@ e65b	vpsop	VRI_VVUUU2
 e65c	vupkzl	VRR_VV0U2
 e65d	vcfn	VRR_VV0UU2
 e65e	vclfnl	VRR_VV0UU2
-e65f	vtp	VRR_0V
+e65f	vtp	VRR_0V0U
 e670	vpkzr	VRI_VVV0UU2
 e671	vap	VRI_VVV0UU2
 e672	vsrpr	VRI_VVV0UU2
@@ -908,6 +924,7 @@ e67b	vrp	VRI_VVV0UU2
 e67c	vscshp	VRR_VVV
 e67d	vcsph	VRR_VVV0U0
 e67e	vsdp	VRI_VVV0UU2
+e67f	vtz	VRR_0VVU
 e700	vleb	VRX_VRRDU
 e701	vleh	VRX_VRRDU
 e702	vleg	VRX_VRRDU
@@ -948,6 +965,7 @@ e74d	vrep	VRI_VVUU
 e750	vpopct	VRR_VV0U
 e752	vctz	VRR_VV0U
 e753	vclz	VRR_VV0U
+e754	vgem	VRR_VV0U
 e756	vlr	VRX_VV
 e75c	vistr	VRR_VV0U0U
 e75f	vseg	VRR_VV0U
@@ -985,6 +1003,8 @@ e784	vpdi	VRR_VVV0U
 e785	vbperm	VRR_VVV
 e786	vsld	VRI_VVV0U
 e787	vsrd	VRI_VVV0U
+e788	veval	VRI_VVV0UV
+e789	vblend	VRR_VVVU0V
 e78a	vstrc	VRR_VVVUU0V
 e78b	vstrs	VRR_VVVUU0V
 e78c	vperm	VRR_VVV0V
@@ -1010,6 +1030,10 @@ e7ac	vmale	VRR_VVVU0V
 e7ad	vmalo	VRR_VVVU0V
 e7ae	vmae	VRR_VVVU0V
 e7af	vmao	VRR_VVVU0V
+e7b0	vdl	VRR_VVV0UU
+e7b1	vrl	VRR_VVV0UU
+e7b2	vd	VRR_VVV0UU
+e7b3	vr	VRR_VVV0UU
 e7b4	vgfm	VRR_VVV0U
 e7b8	vmsl	VRR_VVVUU0V
 e7b9	vaccc	VRR_VVVU0V
@@ -1017,12 +1041,12 @@ e7bb	vac	VRR_VVVU0V
 e7bc	vgfma	VRR_VVVU0V
 e7bd	vsbcbi	VRR_VVVU0V
 e7bf	vsbi	VRR_VVVU0V
-e7c0	vclgd	VRR_VV0UUU
-e7c1	vcdlg	VRR_VV0UUU
-e7c2	vcgd	VRR_VV0UUU
-e7c3	vcdg	VRR_VV0UUU
-e7c4	vlde	VRR_VV0UU2
-e7c5	vled	VRR_VV0UUU
+e7c0	vclfp	VRR_VV0UUU
+e7c1	vcfpl	VRR_VV0UUU
+e7c2	vcsfp	VRR_VV0UUU
+e7c3	vcfps	VRR_VV0UUU
+e7c4	vfll	VRR_VV0UU2
+e7c5	vflr	VRR_VV0UUU
 e7c7	vfi	VRR_VV0UUU
 e7ca	wfk	VRR_VV0UU2
 e7cb	wfc	VRR_VV0UU2
@@ -1094,9 +1118,9 @@ eb54	niy	SIY_URD
 eb55	cliy	SIY_URD
 eb56	oiy	SIY_URD
 eb57	xiy	SIY_URD
-eb60	lric	RSY_RDRU
-eb61	stric	RSY_RDRU
-eb62	mric	RSY_RDRU
+eb60	lric	RSY_RURD2
+eb61	stric	RSY_RURD2
+eb62	mric	RSY_RURD2
 eb6a	asi	SIY_IRD
 eb6e	alsi	SIY_IRD
 eb71	lpswey	SIY_RD
@@ -1104,7 +1128,7 @@ eb7a	agsi	SIY_IRD
 eb7e	algsi	SIY_IRD
 eb80	icmh	RSY_RURD
 eb81	icmy	RSY_RURD
-eb8a	sqbs	RSY_RDRU
+eb8a	sqbs	RSY_RURD2
 eb8e	mvclu	RSY_RRRD
 eb8f	clclu	RSY_RRRD
 eb90	stmy	RSY_RRRD
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
new file mode 100644
index 000000000000..30a732c808f3
--- /dev/null
+++ b/arch/s390/tools/relocs.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+
+#define ELF_BITS 64
+
+#define ELF_MACHINE		EM_S390
+#define ELF_MACHINE_NAME	"IBM S/390"
+#define SHT_REL_TYPE		SHT_RELA
+#define Elf_Rel			Elf64_Rela
+
+#define ELF_CLASS		ELFCLASS64
+#define ELF_ENDIAN		ELFDATA2MSB
+#define ELF_R_SYM(val)		ELF64_R_SYM(val)
+#define ELF_R_TYPE(val)		ELF64_R_TYPE(val)
+#define ELF_ST_TYPE(o)		ELF64_ST_TYPE(o)
+#define ELF_ST_BIND(o)		ELF64_ST_BIND(o)
+#define ELF_ST_VISIBILITY(o)	ELF64_ST_VISIBILITY(o)
+
+#define ElfW(type)		_ElfW(ELF_BITS, type)
+#define _ElfW(bits, type)	__ElfW(bits, type)
+#define __ElfW(bits, type)	Elf##bits##_##type
+
+#define Elf_Addr		ElfW(Addr)
+#define Elf_Ehdr		ElfW(Ehdr)
+#define Elf_Phdr		ElfW(Phdr)
+#define Elf_Shdr		ElfW(Shdr)
+#define Elf_Sym			ElfW(Sym)
+
+static Elf_Ehdr		ehdr;
+static unsigned long	shnum;
+static unsigned int	shstrndx;
+
+struct relocs {
+	uint32_t	*offset;
+	unsigned long	count;
+	unsigned long	size;
+};
+
+static struct relocs relocs64;
+#define FMT PRIu64
+
+struct section {
+	Elf_Shdr	shdr;
+	struct section	*link;
+	Elf_Rel		*reltab;
+};
+
+static struct section *secs;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val)	(val)
+#define le32_to_cpu(val)	(val)
+#define le64_to_cpu(val)	(val)
+#define be16_to_cpu(val)	bswap_16(val)
+#define be32_to_cpu(val)	bswap_32(val)
+#define be64_to_cpu(val)	bswap_64(val)
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val)	bswap_16(val)
+#define le32_to_cpu(val)	bswap_32(val)
+#define le64_to_cpu(val)	bswap_64(val)
+#define be16_to_cpu(val)	(val)
+#define be32_to_cpu(val)	(val)
+#define be64_to_cpu(val)	(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return le16_to_cpu(val);
+	else
+		return be16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return le32_to_cpu(val);
+	else
+		return be32_to_cpu(val);
+}
+
+#define elf_half_to_cpu(x)	elf16_to_cpu(x)
+#define elf_word_to_cpu(x)	elf32_to_cpu(x)
+
+static uint64_t elf64_to_cpu(uint64_t val)
+{
+	return be64_to_cpu(val);
+}
+
+#define elf_addr_to_cpu(x)	elf64_to_cpu(x)
+#define elf_off_to_cpu(x)	elf64_to_cpu(x)
+#define elf_xword_to_cpu(x)	elf64_to_cpu(x)
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+		die("Cannot read ELF header: %s\n", strerror(errno));
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0)
+		die("No ELF magic\n");
+	if (ehdr.e_ident[EI_CLASS] != ELF_CLASS)
+		die("Not a %d bit executable\n", ELF_BITS);
+	if (ehdr.e_ident[EI_DATA] != ELF_ENDIAN)
+		die("ELF endian mismatch\n");
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT)
+		die("Unknown ELF version\n");
+
+	/* Convert the fields to native endian */
+	ehdr.e_type	 = elf_half_to_cpu(ehdr.e_type);
+	ehdr.e_machine	 = elf_half_to_cpu(ehdr.e_machine);
+	ehdr.e_version	 = elf_word_to_cpu(ehdr.e_version);
+	ehdr.e_entry	 = elf_addr_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff	 = elf_off_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff	 = elf_off_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags	 = elf_word_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize	 = elf_half_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf_half_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum	 = elf_half_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf_half_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum	 = elf_half_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf_half_to_cpu(ehdr.e_shstrndx);
+
+	shnum = ehdr.e_shnum;
+	shstrndx = ehdr.e_shstrndx;
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
+		die("Unsupported ELF header type\n");
+	if (ehdr.e_machine != ELF_MACHINE)
+		die("Not for %s\n", ELF_MACHINE_NAME);
+	if (ehdr.e_version != EV_CURRENT)
+		die("Unknown ELF version\n");
+	if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
+		die("Bad Elf header size\n");
+	if (ehdr.e_phentsize != sizeof(Elf_Phdr))
+		die("Bad program header entry\n");
+	if (ehdr.e_shentsize != sizeof(Elf_Shdr))
+		die("Bad section header entry\n");
+
+	if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) {
+		Elf_Shdr shdr;
+
+		if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+			die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+			die("Cannot read initial ELF section header: %s\n", strerror(errno));
+
+		if (shnum == SHN_UNDEF)
+			shnum = elf_xword_to_cpu(shdr.sh_size);
+
+		if (shstrndx == SHN_XINDEX)
+			shstrndx = elf_word_to_cpu(shdr.sh_link);
+	}
+
+	if (shstrndx >= shnum)
+		die("String table index out of bounds\n");
+}
+
+static void read_shdrs(FILE *fp)
+{
+	Elf_Shdr shdr;
+	int i;
+
+	secs = calloc(shnum, sizeof(struct section));
+	if (!secs)
+		die("Unable to allocate %ld section headers\n", shnum);
+
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+		die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+	for (i = 0; i < shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1) {
+			die("Cannot read ELF section headers %d/%ld: %s\n",
+			    i, shnum, strerror(errno));
+		}
+
+		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
+		sec->shdr.sh_type      = elf_word_to_cpu(shdr.sh_type);
+		sec->shdr.sh_flags     = elf_xword_to_cpu(shdr.sh_flags);
+		sec->shdr.sh_addr      = elf_addr_to_cpu(shdr.sh_addr);
+		sec->shdr.sh_offset    = elf_off_to_cpu(shdr.sh_offset);
+		sec->shdr.sh_size      = elf_xword_to_cpu(shdr.sh_size);
+		sec->shdr.sh_link      = elf_word_to_cpu(shdr.sh_link);
+		sec->shdr.sh_info      = elf_word_to_cpu(shdr.sh_info);
+		sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
+		sec->shdr.sh_entsize   = elf_xword_to_cpu(shdr.sh_entsize);
+
+		if (sec->shdr.sh_link < shnum)
+			sec->link = &secs[sec->shdr.sh_link];
+	}
+
+}
+
+static void read_relocs(FILE *fp)
+{
+	int i, j;
+
+	for (i = 0; i < shnum; i++) {
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec->reltab = malloc(sec->shdr.sh_size);
+		if (!sec->reltab)
+			die("malloc of %" FMT " bytes for relocs failed\n", sec->shdr.sh_size);
+
+		if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0)
+			die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno));
+
+		if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size)
+			die("Cannot read symbol table: %s\n", strerror(errno));
+
+		for (j = 0; j < sec->shdr.sh_size / sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+
+			rel->r_offset = elf_addr_to_cpu(rel->r_offset);
+			rel->r_info   = elf_xword_to_cpu(rel->r_info);
+#if (SHT_REL_TYPE == SHT_RELA)
+			rel->r_addend = elf_xword_to_cpu(rel->r_addend);
+#endif
+		}
+	}
+}
+
+static void add_reloc(struct relocs *r, uint32_t offset)
+{
+	if (r->count == r->size) {
+		unsigned long newsize = r->size + 50000;
+		void *mem = realloc(r->offset, newsize * sizeof(r->offset[0]));
+
+		if (!mem)
+			die("realloc of %ld entries for relocs failed\n", newsize);
+
+		r->offset = mem;
+		r->size = newsize;
+	}
+	r->offset[r->count++] = offset;
+}
+
+static int do_reloc(struct section *sec, Elf_Rel *rel)
+{
+	unsigned int r_type = ELF64_R_TYPE(rel->r_info);
+	ElfW(Addr) offset = rel->r_offset;
+
+	switch (r_type) {
+	case R_390_NONE:
+	case R_390_PC32:
+	case R_390_PC64:
+	case R_390_PC16DBL:
+	case R_390_PC32DBL:
+	case R_390_PLT32DBL:
+	case R_390_GOTENT:
+	case R_390_GOTPCDBL:
+	case R_390_GOTOFF64:
+		break;
+	case R_390_64:
+		add_reloc(&relocs64, offset);
+		break;
+	default:
+		die("Unsupported relocation type: %d\n", r_type);
+		break;
+	}
+
+	return 0;
+}
+
+static void walk_relocs(void)
+{
+	int i;
+
+	/* Walk through the relocations */
+	for (i = 0; i < shnum; i++) {
+		struct section *sec_applies;
+		int j;
+		struct section *sec = &secs[i];
+
+		if (sec->shdr.sh_type != SHT_REL_TYPE)
+			continue;
+
+		sec_applies = &secs[sec->shdr.sh_info];
+		if (!(sec_applies->shdr.sh_flags & SHF_ALLOC))
+			continue;
+
+		for (j = 0; j < sec->shdr.sh_size / sizeof(Elf_Rel); j++) {
+			Elf_Rel *rel = &sec->reltab[j];
+
+			do_reloc(sec, rel);
+		}
+	}
+}
+
+static int cmp_relocs(const void *va, const void *vb)
+{
+	const uint32_t *a, *b;
+
+	a = va; b = vb;
+	return (*a == *b) ? 0 : (*a > *b) ? 1 : -1;
+}
+
+static void sort_relocs(struct relocs *r)
+{
+	qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
+}
+
+static int print_reloc(uint32_t v)
+{
+	return fprintf(stdout, "\t.long 0x%08"PRIx32"\n", v) > 0 ? 0 : -1;
+}
+
+static void emit_relocs(void)
+{
+	int i;
+
+	walk_relocs();
+	sort_relocs(&relocs64);
+
+	printf(".section \".vmlinux.relocs_64\",\"a\"\n");
+	for (i = 0; i < relocs64.count; i++)
+		print_reloc(relocs64.offset[i]);
+}
+
+static void process(FILE *fp)
+{
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_relocs(fp);
+	emit_relocs();
+}
+
+static void usage(void)
+{
+	die("relocs vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char e_ident[EI_NIDENT];
+	const char *fname;
+	FILE *fp;
+
+	fname = NULL;
+
+	if (argc != 2)
+		usage();
+
+	fname = argv[1];
+
+	fp = fopen(fname, "r");
+	if (!fp)
+		die("Cannot open %s: %s\n", fname, strerror(errno));
+
+	if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT)
+		die("Cannot read %s: %s", fname, strerror(errno));
+
+	rewind(fp);
+
+	process(fp);
+
+	fclose(fp);
+	return 0;
+}