aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/Makefile4
-rw-r--r--arch/s390/boot/Makefile3
-rw-r--r--arch/s390/boot/startup.c11
-rw-r--r--arch/s390/boot/version.c1
-rw-r--r--arch/s390/boot/vmlinux.lds.S13
-rw-r--r--arch/s390/configs/btf.config1
-rw-r--r--arch/s390/configs/debug_defconfig65
-rw-r--r--arch/s390/configs/defconfig67
-rw-r--r--arch/s390/configs/kasan.config3
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/Kconfig135
-rw-r--r--arch/s390/include/asm/abs_lowcore.h17
-rw-r--r--arch/s390/include/asm/ccwdev.h1
-rw-r--r--arch/s390/include/asm/ctl_reg.h3
-rw-r--r--arch/s390/include/asm/futex.h3
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/maccess.h17
-rw-r--r--arch/s390/include/asm/os_info.h14
-rw-r--r--arch/s390/include/asm/pai.h6
-rw-r--r--arch/s390/include/asm/pci.h1
-rw-r--r--arch/s390/include/asm/pgtable.h4
-rw-r--r--arch/s390/include/asm/processor.h20
-rw-r--r--arch/s390/include/asm/scsw.h5
-rw-r--r--arch/s390/include/asm/smp.h4
-rw-r--r--arch/s390/include/asm/termios.h26
-rw-r--r--arch/s390/include/uapi/asm/dasd.h14
-rw-r--r--arch/s390/include/uapi/asm/termios.h50
-rw-r--r--arch/s390/kernel/Makefile8
-rw-r--r--arch/s390/kernel/abs_lowcore.c95
-rw-r--r--arch/s390/kernel/crash_dump.c40
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/ipl.c9
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/os_info.c10
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c1
-rw-r--r--arch/s390/kernel/perf_pai_ext.c672
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/setup.c47
-rw-r--r--arch/s390/kernel/smp.c97
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kvm/gaccess.c16
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c32
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/pci.c22
-rw-r--r--arch/s390/kvm/pci.h6
-rw-r--r--arch/s390/lib/delay.c11
-rw-r--r--arch/s390/lib/uaccess.c9
-rw-r--r--arch/s390/mm/dump_pagetables.c20
-rw-r--r--arch/s390/mm/fault.c15
-rw-r--r--arch/s390/mm/gmap.c6
-rw-r--r--arch/s390/mm/hugetlbpage.c10
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/maccess.c175
-rw-r--r--arch/s390/mm/mmap.c2
-rw-r--r--arch/s390/mm/vmem.c104
-rw-r--r--arch/s390/pci/pci_dma.c2
-rw-r--r--arch/s390/pci/pci_mmio.c8
61 files changed, 1478 insertions, 465 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 318fce77601d..de575af02ffe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -568,8 +568,7 @@ config EXPOLINE_FULL
endchoice
config RELOCATABLE
- bool "Build a relocatable kernel"
- default y
+ def_bool y
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
@@ -578,10 +577,11 @@ config RELOCATABLE
bootup process.
The relocations make the kernel image about 15% larger (compressed
10%), but are discarded at runtime.
+ Note: this option exists only for documentation purposes, please do
+ not remove it.
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image (KASLR)"
- depends on RELOCATABLE
default y
help
In support of Kernel Address Space Layout Randomization (KASLR),
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 4cb5d17e7ead..b3235ab0ace8 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,10 +14,8 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
-ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS += -fPIE
LDFLAGS_vmlinux := -pie
-endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
@@ -119,8 +117,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
OBJCOPYFLAGS := -O binary
-head-y := arch/s390/kernel/head64.o
-
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 883357a211a3..d52c3e2e16bc 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,9 +37,8 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
-obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bc48fe82d949..47ca3264c023 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -10,11 +10,14 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
+#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -180,7 +183,10 @@ static void setup_kernel_memory_layout(void)
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
- MODULES_END = vmax;
+ __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+ __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+ sizeof(struct lowcore));
+ MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
@@ -285,8 +291,7 @@ void startup_kernel(void)
clear_bss_section();
copy_bootdata();
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- handle_relocs(__kaslr_offset);
+ handle_relocs(__kaslr_offset);
if (__kaslr_offset) {
/*
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
index d32e58bdda6a..fd32f038777f 100644
--- a/arch/s390/boot/version.c
+++ b/arch/s390/boot/version.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
#include <generated/utsrelease.h>
#include <generated/compile.h>
#include "boot.h"
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index af5c6860e0a1..fa9d33b01b85 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -102,8 +102,17 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
- FILL(0xff);
- . = ALIGN(4096);
+ }
+
+#define SB_TRAILER_SIZE 32
+ /* Trailer needed for Secure Boot */
+ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+ . = ALIGN(4096) - SB_TRAILER_SIZE;
+ .sb.trailer : {
+ QUAD(0)
+ QUAD(0)
+ QUAD(0)
+ QUAD(0x000000207a49504c)
}
_end = .;
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
new file mode 100644
index 000000000000..39227b4511af
--- /dev/null
+++ b/arch/s390/configs/btf.config
@@ -0,0 +1 @@
+CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 2a827002934b..63807bd0b536 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -723,52 +723,42 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -779,6 +769,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -797,7 +797,6 @@ CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_HEADERS_INSTALL=y
CONFIG_DEBUG_SECTION_MISMATCH=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index fb780e80e4c8..4f9a98247442 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -707,53 +707,43 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_HCTR2=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SHA3_256_S390=m
-CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_SM3_GENERIC=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
-CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -764,6 +754,16 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA3_256_S390=m
+CONFIG_CRYPTO_SHA3_512_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -781,7 +781,6 @@ CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
new file mode 100644
index 000000000000..700a8b25c3ff
--- /dev/null
+++ b/arch/s390/configs/kasan.config
@@ -0,0 +1,3 @@
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a5576b8d4081..5fe9948be644 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -74,7 +74,6 @@ CONFIG_PRINTK_TIME=y
# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 000000000000..06ee706b0d78
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_CRC32_S390
+ tristate "CRC32c and CRC32"
+ depends on S390
+ select CRYPTO_HASH
+ select CRC32
+ help
+ CRC32c and CRC32 CRC algorithms
+
+ Architecture: s390
+
+ It is available with IBM z13 or later.
+
+config CRYPTO_SHA512_S390
+ tristate "Hash functions: SHA-384 and SHA-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z10.
+
+config CRYPTO_SHA1_S390
+ tristate "Hash functions: SHA-1"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z990.
+
+config CRYPTO_SHA256_S390
+ tristate "Hash functions: SHA-224 and SHA-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z9.
+
+config CRYPTO_SHA3_256_S390
+ tristate "Hash functions: SHA3-224 and SHA3-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+ tristate "Hash functions: SHA3-384 and SHA3-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_GHASH_S390
+ tristate "Hash functions: GHASH"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ GCM GHASH hash function (NIST SP800-38D)
+
+ Architecture: s390
+
+ It is available as of z196.
+
+config CRYPTO_AES_S390
+ tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ help
+ Block cipher: AES cipher algorithms (FIPS 197)
+ AEAD cipher: AES with GCM
+ Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+ Architecture: s390
+
+ As of z9 the ECB and CBC modes are hardware accelerated
+ for 128 bit keys.
+
+ As of z10 the ECB and CBC modes are hardware accelerated
+ for all AES key sizes.
+
+ As of z196 the CTR mode is hardware accelerated for all AES
+ key sizes and XTS mode is hardware accelerated for 256 and
+ 512 bit keys.
+
+config CRYPTO_DES_S390
+ tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_DES
+ help
+ Block ciphers: DES (FIPS 46-2) cipher algorithm
+ Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+ Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+ Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+ Architecture: s390
+
+ As of z990 the ECB and CBC mode are hardware accelerated.
+ As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_CHACHA_S390
+ tristate "Ciphers: ChaCha20"
+ depends on S390
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+
+ Architecture: s390
+
+ It is available as of z13.
+
+endmenu
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 000000000000..4c61b14ee928
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+extern bool abs_lowcore_mapped;
+
+struct lowcore *get_abs_lowcore(unsigned long *flags);
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index d4e90f2ba77e..bd1596810cc1 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -214,7 +214,6 @@ extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
extern int ccw_device_enable_console(struct ccw_device *);
extern void ccw_device_wait_idle(struct ccw_device *);
-extern int ccw_device_force_console(struct ccw_device *);
extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
extern void ccw_device_dma_free(struct ccw_device *cdev,
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 267a8f88e143..adf7d8cdac7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -95,7 +95,8 @@ union ctlreg0 {
Interruption-Filtering Override */
unsigned long : 3;
unsigned long ccc : 1; /* Cryptography counter control */
- unsigned long : 18;
+ unsigned long pec : 1; /* PAI extension control */
+ unsigned long : 17;
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index e08c882dccaa..eaeaeb3ff0be 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -17,7 +17,8 @@
"3: jl 1b\n" \
" lhi %0,0\n" \
"4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
+ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
: "=d" (ret), "=&d" (oldval), "=&d" (newval), \
"=m" (*uaddr) \
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 26fe5e535728..8aa1f6530a3e 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -203,7 +203,9 @@ struct lowcore {
__u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */
/* Cryptography-counter designation */
__u64 ccd; /* 0x1500 */
- __u8 pad_0x1508[0x1800-0x1508]; /* 0x1508 */
+ /* AI-extension counter designation */
+ __u64 aicd; /* 0x1508 */
+ __u8 pad_0x1510[0x1800-0x1510]; /* 0x1510 */
/* Transaction abort diagnostic block */
struct pgm_tdb pgm_tdb; /* 0x1800 */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 000000000000..c7fa838cf6b9
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+void memcpy_real_init(void);
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 85248d8fee0c..0d1c74a7a650 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -41,20 +41,6 @@ u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
-size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count);
-
-static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
-{
- struct iov_iter iter;
- struct kvec kvec;
-
- kvec.iov_base = dst;
- kvec.iov_len = count;
- iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
- if (copy_oldmem_iter(&iter, src, count) < count)
- return -EFAULT;
- return 0;
-}
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 5b7e33ac6f0b..1a8a6b15d121 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -17,7 +17,9 @@ struct qpaci_info_block {
struct {
u64 : 8;
u64 num_cc : 8; /* # of supported crypto counters */
- u64 : 48;
+ u64 : 9;
+ u64 num_nnpa : 7; /* # of supported NNPA counters */
+ u64 : 32;
};
};
@@ -42,6 +44,8 @@ static inline int qpaci(struct qpaci_info_block *info)
#define PAI_CRYPTO_BASE 0x1000 /* First event number */
#define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */
#define PAI_CRYPTO_KERNEL_OFFSET 2048
+#define PAI_NNPA_BASE 0x1800 /* First event number */
+#define PAI_NNPA_MAXCTR 128 /* Max # of event counters */
DECLARE_STATIC_KEY_FALSE(pai_key);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7b4cdadbc023..108e732d7b14 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -117,7 +117,6 @@ struct zpci_bus {
struct zpci_dev {
struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
- struct list_head bus_next;
struct kref kref;
struct hotplug_slot hotplug_slot;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f019df19884d..f1cb9391190d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1777,6 +1777,10 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bd66f8e34949..87be3e855bf7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -186,9 +186,6 @@ struct pt_regs;
void show_registers(struct pt_regs *regs);
void show_cacheinfo(struct seq_file *m);
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *tsk) { }
-
/* Free guarded storage control block */
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);
@@ -306,23 +303,6 @@ static __always_inline void __noreturn disabled_wait(void)
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
-extern int memcpy_real(void *, unsigned long, size_t);
-extern void memcpy_absolute(void *, void *, size_t);
-
-#define put_abs_lowcore(member, x) do { \
- unsigned long __abs_address = offsetof(struct lowcore, member); \
- __typeof__(((struct lowcore *)0)->member) __tmp = (x); \
- \
- memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
-} while (0)
-
-#define get_abs_lowcore(x, member) do { \
- unsigned long __abs_address = offsetof(struct lowcore, member); \
- __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
- \
- memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
-} while (0)
-
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 7ce584aff5bb..322bdcd4b616 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -215,6 +215,11 @@ union scsw {
#define SNS2_ENV_DATA_PRESENT 0x10
#define SNS2_INPRECISE_END 0x04
+/*
+ * architectured values for PPRC errors
+ */
+#define SNS7_INVALID_ON_SEC 0x0e
+
/**
* scsw_is_tm - check for transport mode scsw
* @scsw: pointer to scsw
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7f5d4763357b..73ed2781073b 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -30,7 +30,8 @@ extern void smp_emergency_stop(void);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
-extern void smp_save_dump_cpus(void);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
@@ -58,6 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
+extern int smp_reinit_ipl_cpu(void);
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
deleted file mode 100644
index 46fa3020b41e..000000000000
--- a/arch/s390/include/asm/termios.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-#ifndef _S390_TERMIOS_H
-#define _S390_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-
-/* intr=^C quit=^\ erase=del kill=^U
- eof=^D vtime=\0 vmin=\1 sxtc=\0
- start=^Q stop=^S susp=^Z eol=\0
- reprint=^R discard=^U werase=^W lnext=^V
- eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-
-#include <asm-generic/termios-base.h>
-
-#endif /* _S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 9ec86fae9980..93d1ccd3304c 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -183,6 +183,18 @@ typedef struct format_data_t {
} format_data_t;
/*
+ * struct dasd_copypair_swap_data_t
+ * represents all data necessary to issue a swap of the copy pair relation
+ */
+struct dasd_copypair_swap_data_t {
+ char primary[20]; /* BUSID of primary */
+ char secondary[20]; /* BUSID of secondary */
+
+ /* Reserved for future updates. */
+ __u8 reserved[64];
+};
+
+/*
* values to be used for format_data_t.intensity
* 0/8: normal format
* 1/9: also write record zero
@@ -326,6 +338,8 @@ struct dasd_snid_ioctl_data {
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
/* Release Allocated Space */
#define BIODASDRAS _IOW(DASD_IOCTL_LETTER, 3, format_data_t)
+/* Swap copy pair relation */
+#define BIODASDCOPYPAIRSWAP _IOW(DASD_IOCTL_LETTER, 4, struct dasd_copypair_swap_data_t)
/* Get Sense Path Group ID (SNID) data */
#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
deleted file mode 100644
index 54223169c806..000000000000
--- a/arch/s390/include/uapi/asm/termios.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-
-#ifndef _UAPI_S390_TERMIOS_H
-#define _UAPI_S390_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
- unsigned short c_iflag; /* input mode flags */
- unsigned short c_oflag; /* output mode flags */
- unsigned short c_cflag; /* control mode flags */
- unsigned short c_lflag; /* local mode flags */
- unsigned char c_line; /* line discipline */
- unsigned char c_cc[NCC]; /* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE 0x001
-#define TIOCM_DTR 0x002
-#define TIOCM_RTS 0x004
-#define TIOCM_ST 0x008
-#define TIOCM_SR 0x010
-#define TIOCM_CTS 0x020
-#define TIOCM_CAR 0x040
-#define TIOCM_RNG 0x080
-#define TIOCM_DSR 0x100
-#define TIOCM_CD TIOCM_CAR
-#define TIOCM_RI TIOCM_RNG
-#define TIOCM_OUT1 0x2000
-#define TIOCM_OUT2 0x4000
-#define TIOCM_LOOP 0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-
-#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3cbfa9fddd9a..5e6a23299790 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,16 +33,16 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
-extra-y += head64.o vmlinux.lds
+extra-y += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
@@ -72,7 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..fb92e8ed0525
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+#define ABS_LOWCORE_UNMAPPED 1
+#define ABS_LOWCORE_LAP_ON 2
+#define ABS_LOWCORE_IRQS_ON 4
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+bool __ro_after_init abs_lowcore_mapped;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+ if (rc) {
+ /*
+ * Do not unmap allocated page tables in case the
+ * allocation was not requested. In such a case the
+ * request is expected coming from an atomic context,
+ * while the unmap attempt might sleep.
+ */
+ if (alloc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
+
+struct lowcore *get_abs_lowcore(unsigned long *flags)
+{
+ unsigned long irq_flags;
+ union ctlreg0 cr0;
+ int cpu;
+
+ *flags = 0;
+ cpu = get_cpu();
+ if (abs_lowcore_mapped) {
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+ } else {
+ if (cpu != 0)
+ panic("Invalid unmapped absolute lowcore access\n");
+ local_irq_save(irq_flags);
+ if (!irqs_disabled_flags(irq_flags))
+ *flags |= ABS_LOWCORE_IRQS_ON;
+ __ctl_store(cr0.val, 0, 0);
+ if (cr0.lap) {
+ *flags |= ABS_LOWCORE_LAP_ON;
+ __ctl_clear_bit(0, 28);
+ }
+ *flags |= ABS_LOWCORE_UNMAPPED;
+ return lowcore_ptr[0];
+ }
+}
+
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
+{
+ if (abs_lowcore_mapped) {
+ if (flags)
+ panic("Invalid mapped absolute lowcore release\n");
+ } else {
+ if (smp_processor_id() != 0)
+ panic("Invalid mapped absolute lowcore access\n");
+ if (!(flags & ABS_LOWCORE_UNMAPPED))
+ panic("Invalid unmapped absolute lowcore release\n");
+ if (flags & ABS_LOWCORE_LAP_ON)
+ __ctl_set_bit(0, 28);
+ if (flags & ABS_LOWCORE_IRQS_ON)
+ local_irq_enable();
+ }
+ put_cpu();
+}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index bad8f47fc5d6..dd74fe664ed1 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
+#include <asm/maccess.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -53,8 +54,6 @@ struct save_area {
};
static LIST_HEAD(dump_save_areas);
-static DEFINE_MUTEX(memcpy_real_mutex);
-static char memcpy_real_buf[PAGE_SIZE];
/*
* Allocate a save area
@@ -116,27 +115,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count)
-{
- size_t len, copied, res = 0;
-
- mutex_lock(&memcpy_real_mutex);
- while (count) {
- len = min(PAGE_SIZE, count);
- if (memcpy_real(memcpy_real_buf, src, len))
- break;
- copied = copy_to_iter(memcpy_real_buf, len, iter);
- count -= copied;
- src += copied;
- res += copied;
- if (copied < len)
- break;
- }
- mutex_unlock(&memcpy_real_mutex);
- return res;
-}
-
-size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
size_t len, copied, res = 0;
@@ -156,7 +135,7 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
} else {
len = count;
}
- copied = copy_to_iter_real(iter, src, len);
+ copied = memcpy_real_iter(iter, src, len);
}
count -= copied;
src += copied;
@@ -167,6 +146,19 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
return res;
}
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
+}
+
/*
* Copy one page from "oldmem"
*/
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..d7a82066a638 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strlcpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name, sizeof(rc->name));
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 432c8c987256..6030fdd6997b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -267,7 +267,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
/* copy arch command line */
- strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
}
static void __init check_image_bootable(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..325cbf69ebbd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -29,6 +29,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- put_abs_lowcore(ipib, ipib);
- put_abs_lowcore(ipib_checksum, csum);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->ipib = ipib;
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc, flags);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..4579b42286d5 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
@@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ struct lowcore *abs_lc;
+ unsigned long flags;
+
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc, flags);
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..ec0bd9457e90 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,8 +13,9 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/maccess.h>
#include <asm/asm-offsets.h>
/*
@@ -57,13 +58,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- put_abs_lowcore(os_info, __pa(ptr));
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc, flags);
}
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f7dd3c849e68..f043a7ff220b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index b38b4ae01589..6826e2a69a21 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..74b53c531e0c
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_ext"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+
+#include <asm/cpu_mcf.h>
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
+#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
+
+enum paiext_mode {
+ PAI_MODE_NONE,
+ PAI_MODE_SAMPLING,
+ PAI_MODE_COUNTER,
+};
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Area to store non-zero counters */
+ enum paiext_mode mode; /* Type of event */
+ unsigned int active_events; /* # of PAI Extension users */
+ unsigned int refcnt;
+ struct perf_event *event; /* Perf event for sampling */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+ struct paiext_map *mapptr;
+};
+
+static struct paiext_root { /* Anchor to per CPU data */
+ int refcnt; /* Overall active events */
+ struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+ if (!--paiext_root.refcnt) {
+ free_percpu(paiext_root.mapptr);
+ paiext_root.mapptr = NULL;
+ }
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+ if (++paiext_root.refcnt == 1) {
+ /* The memory is already zeroed. */
+ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+ if (!paiext_root.mapptr) {
+ /* Returing without refcnt adjustment is ok. The
+ * error code is handled by paiext_alloc() which
+ * decrements refcnt when an event can not be
+ * created.
+ */
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ struct paiext_map *cpump = mp->mapptr;
+
+ mutex_lock(&paiext_reserve_mutex);
+ cpump->event = NULL;
+ if (!--cpump->refcnt) /* Last reference gone */
+ paiext_free(mp);
+ paiext_root_free();
+ mutex_unlock(&paiext_reserve_mutex);
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+ event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+ struct paiext_mapptr *mp;
+ struct paiext_map *cpump;
+ int rc;
+
+ mutex_lock(&paiext_reserve_mutex);
+
+ rc = paiext_root_alloc();
+ if (rc)
+ goto unlock;
+
+ mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paiext_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto unlock;
+
+ /* Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary alignment.
+ * - a 1KB byte block and requires 1KB boundary alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * need adjustment.
+ */
+ mp->mapptr = cpump;
+ cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ cpump->save = kvmalloc_array(paiext_cnt + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+ paiext_free(mp);
+ goto unlock;
+ }
+ cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+ : PAI_MODE_COUNTER;
+ } else {
+ /* Multiple invocation, check whats active.
+ * Supported are multiple counter events or only one sampling
+ * event concurrently at any one time.
+ */
+ if (cpump->mode == PAI_MODE_SAMPLING ||
+ (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
+ rc = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ rc = 0;
+ cpump->event = event;
+ ++cpump->refcnt;
+
+unlock:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ paiext_root_free();
+ }
+ mutex_unlock(&paiext_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+ /* Offset NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI extension event must be valid and in supported range */
+ rc = paiext_event_valid(event);
+ if (rc)
+ return rc;
+ /* Allow only CPU wide operation, no process context for now. */
+ if (event->hw.target || event->cpu == -1)
+ return -ENOENT;
+ /* Allow only event NNPA_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_NNPA_BASE)
+ return -EINVAL;
+ /* Prohibit exclude_user event selection */
+ if (a->exclude_user)
+ return -EINVAL;
+
+ rc = paiext_alloc(a, event);
+ if (rc)
+ return rc;
+ event->hw.last_tag = 0;
+ event->destroy = paiext_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which are the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+ return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_NNPA_BASE)
+ return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+ for (i = 1; i <= paiext_cnt; i++)
+ sum += paiext_getctr(cpump, i);
+
+ return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paiext_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = new - prev;
+ local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ u64 sum;
+
+ if (event->hw.last_tag)
+ return;
+ event->hw.last_tag = 1;
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (++cpump->active_events == 1) {
+ S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ __ctl_set_bit(0, 49);
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+ if (flags & PERF_EF_START && !event->attr.sample_period) {
+ /* Only counting needs initial counter value */
+ paiext_start(event, PERF_EF_RELOAD);
+ }
+ event->hw.state = 0;
+ if (event->attr.sample_period) {
+ cpump->event = event;
+ perf_sched_cb_inc(event->pmu);
+ }
+ return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ paiext_read(event);
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (event->attr.sample_period)
+ perf_sched_cb_dec(event->pmu);
+ if (!event->attr.sample_period) {
+ /* Only counting needs to read counter */
+ paiext_stop(event, PERF_EF_UPDATE);
+ }
+ if (--cpump->active_events == 0) {
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ __ctl_clear_bit(0, 49);
+ pcb->acc = 0;
+ S390_lowcore.aicd = 0;
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+ struct pai_userdata *userdata = cpump->save;
+ int i, outidx = 0;
+
+ for (i = 1; i <= paiext_cnt; i++) {
+ u64 val = paiext_getctr(cpump, i);
+
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ size_t rawsize;
+ int overflow;
+
+ rawsize = paiext_copy(cpump);
+ if (!rawsize) /* No incremented counters */
+ return 0;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = smp_processor_id();
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Clear lowcore area after read */
+ memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+ return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, clear values.
+ */
+ if (!sched_in)
+ paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ struct device_attribute *dap;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_NNPA_BASE + num;
+ pa->attr.attr.name = paiext_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+ GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i - 1);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paiext_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paiext_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc = -ENOMEM;
+
+ if (!test_facility(197))
+ return 0;
+
+ qpaci(&ib);
+ paiext_cnt = ib.num_nnpa;
+ if (paiext_cnt >= PAI_NNPA_MAXCTR)
+ paiext_cnt = PAI_NNPA_MAXCTR;
+ if (!paiext_cnt)
+ return 0;
+
+ rc = attr_event_init();
+ if (rc) {
+ pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!paiext_dbg) {
+ pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+ rc = -ENOMEM;
+ goto out_init;
+ }
+ debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+ if (rc) {
+ pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+ "rc=%i\n", rc);
+ goto out_pmu;
+ }
+
+ return 0;
+
+out_pmu:
+ debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+ debug_unregister(paiext_dbg);
+out_init:
+ attr_event_free(paiext_events_group.attrs,
+ ARRAY_SIZE(paiext_ctrnames) + 1);
+ return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index d5119e039d85..42af4b3aa02b 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -224,13 +224,13 @@ unsigned long __get_wchan(struct task_struct *p)
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= prandom_u32_max(PAGE_SIZE);
return sp & ~0xf;
}
static inline unsigned long brk_rnd(void)
{
- return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index bbd4bde4f65d..ab19ddb09d65 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@@ -74,6 +74,7 @@
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
+#include <asm/maccess.h>
#include <asm/uv.h>
#include <asm/asm-offsets.h>
#include "entry.h"
@@ -395,6 +396,7 @@ void __init arch_call_rest_init(void)
{
unsigned long stack;
+ smp_reinit_ipl_cpu();
stack = stack_alloc();
if (!stack)
panic("Couldn't allocate kernel stack");
@@ -411,8 +413,9 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
+ struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
- struct lowcore *lc;
+ unsigned long flags;
if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
@@ -474,12 +477,14 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
- put_abs_lowcore(restart_stack, lc->restart_stack);
- put_abs_lowcore(restart_fn, lc->restart_fn);
- put_abs_lowcore(restart_data, lc->restart_data);
- put_abs_lowcore(restart_source, lc->restart_source);
- put_abs_lowcore(restart_psw, lc->restart_psw);
- put_abs_lowcore(mcesad, lc->mcesad);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc, flags);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -500,8 +505,8 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void)
{
- struct lowcore *lc = lowcore_ptr[0];
- int cr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -510,10 +515,15 @@ static void __init setup_lowcore_dat_on(void)
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
__ctl_set_bit(0, 28);
__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
- put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
- put_abs_lowcore(program_new_psw, lc->program_new_psw);
- for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
- put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+ if (abs_lowcore_map(0, lowcore_ptr[0], true))
+ panic("Couldn't setup absolute lowcore");
+ abs_lowcore_mapped = true;
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = S390_lowcore.program_new_psw;
+ memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
+ sizeof(abs_lc->cregs_save_area));
+ put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -1019,10 +1029,10 @@ void __init setup_arch(char **cmdline_p)
reserve_crashkernel();
#ifdef CONFIG_CRASH_DUMP
/*
- * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
* Therefore CPU and device initialization should be done afterwards.
*/
- smp_save_dump_cpus();
+ smp_save_dump_secondary_cpus();
#endif
setup_resources();
@@ -1041,12 +1051,15 @@ void __init setup_arch(char **cmdline_p)
* Create kernel page tables and switch to virtual addressing.
*/
paging_init();
-
+ memcpy_real_init();
/*
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+ smp_save_dump_ipl_cpu();
+#endif
/* Setup default console */
conmode_default();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..0031325ce4bc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -45,7 +45,7 @@
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@@ -55,6 +55,7 @@
#include <asm/stacktrace.h>
#include <asm/topology.h>
#include <asm/vdso.h>
+#include <asm/maccess.h>
#include "entry.h"
enum {
@@ -212,10 +213,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
+ if (abs_lowcore_map(cpu, lc, true))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@@ -237,6 +242,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
+ abs_lowcore_unmap(cpu);
nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
@@ -315,9 +321,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
- struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned int source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
+ unsigned long flags;
+ lc = lowcore_ptr[pcpu - pcpu_devices];
+ source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
@@ -332,10 +341,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- put_abs_lowcore(restart_stack, stack);
- put_abs_lowcore(restart_fn, (unsigned long)func);
- put_abs_lowcore(restart_data, (unsigned long)data);
- put_abs_lowcore(restart_source, source_cpu);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc, flags);
}
__bpon();
asm volatile(
@@ -581,6 +592,8 @@ static DEFINE_SPINLOCK(ctl_lock);
void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
+ struct lowcore *abs_lc;
+ unsigned long flags;
u64 ctlreg;
if (set) {
@@ -591,9 +604,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
- get_abs_lowcore(ctlreg, cregs_save_area[cr]);
+ abs_lc = get_abs_lowcore(&flags);
+ ctlreg = abs_lc->cregs_save_area[cr];
ctlreg = (ctlreg & parms.andval) | parms.orval;
- put_abs_lowcore(cregs_save_area[cr], ctlreg);
+ abs_lc->cregs_save_area[cr] = ctlreg;
+ put_abs_lowcore(abs_lc, flags);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
@@ -650,35 +665,36 @@ int smp_store_status(int cpu)
* This case does not exist for s390 anymore, setup_arch explicitly
* deactivates the elfcorehdr= kernel parameter
*/
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, __vector128 *vxrs)
+static bool dump_available(void)
{
- if (is_boot_cpu)
- vxrs = boot_cpu_vector_save_area;
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
- save_area_add_vxrs(sa, vxrs);
+ return oldmem_data.start || is_ipl_type_dump();
}
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, void *regs)
+void __init smp_save_dump_ipl_cpu(void)
{
- if (is_boot_cpu)
- copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
+ struct save_area *sa;
+ void *regs;
+
+ if (!dump_available())
+ return;
+ sa = save_area_alloc(true);
+ regs = memblock_alloc(512, 8);
+ if (!sa || !regs)
+ panic("could not allocate memory for boot CPU save area\n");
+ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
+ memblock_free(regs, 512);
+ if (MACHINE_HAS_VX)
+ save_area_add_vxrs(sa, boot_cpu_vector_save_area);
}
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
{
int addr, boot_cpu_addr, max_cpu_addr;
struct save_area *sa;
- bool is_boot_cpu;
void *page;
- if (!(oldmem_data.start || is_ipl_type_dump()))
- /* No previous system present, normal boot. */
+ if (!dump_available())
return;
/* Allocate a page as dumping area for the store status sigps */
page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
@@ -691,26 +707,20 @@ void __init smp_save_dump_cpus(void)
boot_cpu_addr = stap();
max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (addr == boot_cpu_addr)
+ continue;
if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- is_boot_cpu = (addr == boot_cpu_addr);
- /* Allocate save area */
- sa = save_area_alloc(is_boot_cpu);
+ sa = save_area_alloc(false);
if (!sa)
panic("could not allocate memory for save area\n");
- if (MACHINE_HAS_VX)
- /* Get the vector registers */
- smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
- /*
- * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
- * of the boot CPU are stored in the HSA. To retrieve
- * these registers an SCLP request is required which is
- * done by drivers/s390/char/zcore.c:init_cpu_info()
- */
- if (!is_boot_cpu || oldmem_data.start)
- /* Get the CPU registers */
- smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+ save_area_add_regs(sa, page);
+ if (MACHINE_HAS_VX) {
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+ save_area_add_vxrs(sa, page);
+ }
}
memblock_free(page, PAGE_SIZE);
diag_amode31_ops.diag308_reset();
@@ -1256,7 +1266,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc)
: "memory", "cc");
}
-static int __init smp_reinit_ipl_cpu(void)
+int __init smp_reinit_ipl_cpu(void)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc, *lc_ipl;
@@ -1281,6 +1291,8 @@ static int __init smp_reinit_ipl_cpu(void)
__ctl_clear_bit(0, 28); /* disable lowcore protection */
S390_lowcore.mcesad = mcesad;
__ctl_load(cr0, 0, 0);
+ if (abs_lowcore_map(0, lc, false))
+ panic("Couldn't remap absolute lowcore");
lowcore_ptr[0] = lc;
local_mcck_enable();
local_irq_restore(flags);
@@ -1291,4 +1303,3 @@ static int __init smp_reinit_ipl_cpu(void)
return 0;
}
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 5075cde77b29..3105ca5bd470 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -69,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
mmap_read_lock(mm);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
unsigned long size = vma->vm_end - vma->vm_start;
if (!vma_is_special_mapping(vma, &vvar_mapping))
@@ -226,7 +227,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
end -= len;
if (end > start) {
- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 082ec5f2c3a5..0243b6e38d36 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -489,6 +489,8 @@ enum prot_type {
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
+ /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+ PROT_NONE,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -504,6 +506,10 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_NONE:
+ /* We should never get here, acts like termination */
+ WARN_ON_ONCE(1);
+ break;
case PROT_TYPE_IEP:
tec->b61 = 1;
fallthrough;
@@ -968,8 +974,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
return rc;
} else {
gpa = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, gpa))
+ if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
+ prot = PROT_NONE;
+ }
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
@@ -1112,8 +1120,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION && try_storage_prot_override)
rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
data, fragment_len, PAGE_SPO_ACC);
- if (rc == PGM_PROTECTION)
- prot = PROT_TYPE_KEYC;
if (rc)
break;
len -= fragment_len;
@@ -1123,6 +1129,10 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc > 0) {
bool terminate = (mode == GACC_STORE) && (idx > 0);
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
+ else
+ prot = PROT_NONE;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b9c944b262c7..ab569faf0df2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3324,7 +3324,7 @@ static void aen_host_forward(unsigned long si)
if (gaite->count == 0)
return;
if (gaite->aisb != 0)
- set_bit_inv(gaite->aisbo, (unsigned long *)gaite->aisb);
+ set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb));
kvm = kvm_s390_pci_si_to_kvm(aift, si);
if (!kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index edfd4bbd0cba..bc491a73815c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -505,7 +505,7 @@ int kvm_arch_init(void *opaque)
goto out;
}
- if (kvm_s390_pci_interp_allowed()) {
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
rc = kvm_s390_pci_init();
if (rc) {
pr_err("Unable to allocate AIFT for PCI\n");
@@ -527,7 +527,7 @@ out:
void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
- if (kvm_s390_pci_interp_allowed())
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
kvm_s390_pci_exit();
debug_unregister(kvm_s390_dbf);
debug_unregister(kvm_s390_dbf_uv);
@@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
return 0;
}
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_tod_clock gtod;
@@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
return -EINVAL;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
gtod.epoch_idx, gtod.tod);
@@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
sizeof(gtod.tod)))
return -EFAULT;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
return 0;
}
@@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
if (attr->flags)
return -EINVAL;
+ mutex_lock(&kvm->lock);
+ /*
+ * For protected guests, the TOD is managed by the ultravisor, so trying
+ * to change it will never bring the expected results.
+ */
+ if (kvm_s390_pv_is_protected(kvm)) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
switch (attr->attr) {
case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_set_tod_ext(kvm, attr);
@@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENXIO;
break;
}
+
+out_unlock:
+ mutex_unlock(&kvm->lock);
return ret;
}
@@ -4343,8 +4358,6 @@ retry:
goto retry;
}
- /* nothing to do, just clear the request */
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
@@ -4379,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t
preempt_enable();
}
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
-{
- mutex_lock(&kvm->lock);
- __kvm_s390_set_tod_clock(kvm, gtod);
- mutex_unlock(&kvm->lock);
-}
-
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
{
if (!mutex_trylock(&kvm->lock))
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f6fd668f887e..4755492dfabc 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index bb8c335d17b9..ded1af2ddae9 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -58,7 +58,7 @@ static int zpci_setup_aipb(u8 nisc)
if (!zpci_aipb)
return -ENOMEM;
- aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0);
+ aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
if (!aift->sbv) {
rc = -ENOMEM;
goto free_aipb;
@@ -71,7 +71,7 @@ static int zpci_setup_aipb(u8 nisc)
rc = -ENOMEM;
goto free_sbv;
}
- aift->gait = (struct zpci_gaite *)page_to_phys(page);
+ aift->gait = (struct zpci_gaite *)page_to_virt(page);
zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
@@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc)
return -EPERM;
mutex_lock(&aift->aift_lock);
- aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
+ aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
GFP_KERNEL);
if (!aift->kzdev) {
rc = -ENOMEM;
@@ -373,7 +373,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
gaite->gisc = 0;
gaite->aisbo = 0;
gaite->gisa = 0;
- aift->kzdev[zdev->aisb] = 0;
+ aift->kzdev[zdev->aisb] = NULL;
/* Clear zdev info */
airq_iv_free_bit(aift->sbv, zdev->aisb);
airq_iv_release(zdev->aibv);
@@ -672,23 +672,31 @@ out:
int kvm_s390_pci_init(void)
{
+ zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
+ zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
+
+ if (!kvm_s390_pci_interp_allowed())
+ return 0;
+
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
if (!aift)
return -ENOMEM;
spin_lock_init(&aift->gait_lock);
mutex_init(&aift->aift_lock);
- zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
- zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
return 0;
}
void kvm_s390_pci_exit(void)
{
- mutex_destroy(&aift->aift_lock);
zpci_kvm_hook.kvm_register = NULL;
zpci_kvm_hook.kvm_unregister = NULL;
+ if (!kvm_s390_pci_interp_allowed())
+ return;
+
+ mutex_destroy(&aift->aift_lock);
+
kfree(aift);
}
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
index 3a3606c3a0fe..486d06ef563f 100644
--- a/arch/s390/kvm/pci.h
+++ b/arch/s390/kvm/pci.h
@@ -46,9 +46,9 @@ extern struct zpci_aift *aift;
static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
unsigned long si)
{
- if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || aift->kzdev == 0 ||
- aift->kzdev[si] == 0)
- return 0;
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev ||
+ !aift->kzdev[si])
+ return NULL;
return aift->kzdev[si]->kvm;
};
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index f7f5adea8940..be14c58cb989 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -13,13 +13,10 @@
void __delay(unsigned long loops)
{
- /*
- * To end the bloody studid and useless discussion about the
- * BogoMips number I took the liberty to define the __delay
- * function in a way that that resulting BogoMips number will
- * yield the megahertz number of the cpu. The important function
- * is udelay and that is done using the tod clock. -- martin.
- */
+ /*
+ * Loop 'loops' times. Callers must not assume a specific
+ * amount of time passes before this function returns.
+ */
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d7b3b193d108..720036fb1924 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -81,8 +81,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
might_fault();
if (!should_fail_usercopy()) {
- instrument_copy_from_user(to, from, n);
+ instrument_copy_from_user_before(to, from, n);
res = raw_copy_from_user_key(to, from, n, key);
+ instrument_copy_from_user_after(to, from, n, res);
}
if (unlikely(res))
memset(to + (n - res), 0, res);
@@ -156,7 +157,7 @@ unsigned long __clear_user(void __user *to, unsigned long size)
asm volatile(
" lr 0,%[spec]\n"
"0: mvcos 0(%1),0(%4),%0\n"
- " jz 4f\n"
+ "6: jz 4f\n"
"1: algr %0,%2\n"
" slgr %1,%2\n"
" j 0b\n"
@@ -166,11 +167,11 @@ unsigned long __clear_user(void __user *to, unsigned long size)
" clgr %0,%3\n" /* copy crosses next page boundary? */
" jnh 5f\n"
"3: mvcos 0(%1),0(%4),%3\n"
- " slgr %0,%3\n"
+ "7: slgr %0,%3\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
: "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9f9af5298dd6..9953819d7959 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -8,8 +8,10 @@
#include <linux/kasan.h>
#include <asm/ptdump.h>
#include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
#include <asm/nospec-branch.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
static unsigned long max_addr;
@@ -21,6 +23,8 @@ struct addr_marker {
enum address_markers_idx {
IDENTITY_BEFORE_NR = 0,
IDENTITY_BEFORE_END_NR,
+ AMODE31_START_NR,
+ AMODE31_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
#ifdef CONFIG_KFENCE
@@ -39,11 +43,17 @@ enum address_markers_idx {
VMALLOC_END_NR,
MODULES_NR,
MODULES_END_NR,
+ ABS_LOWCORE_NR,
+ ABS_LOWCORE_END_NR,
+ MEMCPY_REAL_NR,
+ MEMCPY_REAL_END_NR,
};
static struct addr_marker address_markers[] = {
[IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
+ [AMODE31_START_NR] = {0, "Amode31 Area Start"},
+ [AMODE31_END_NR] = {0, "Amode31 Area End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
#ifdef CONFIG_KFENCE
@@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = {
[VMALLOC_END_NR] = {0, "vmalloc Area End"},
[MODULES_NR] = {0, "Modules Area Start"},
[MODULES_END_NR] = {0, "Modules Area End"},
+ [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
+ [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
+ [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
+ [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
{ -1, NULL }
};
@@ -276,8 +290,14 @@ static int pt_dump_init(void)
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+ address_markers[AMODE31_START_NR].start_address = __samode31;
+ address_markers[AMODE31_END_NR].start_address = __eamode31;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END;
+ address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
+ address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
+ address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
+ address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9ab6ca6f7f59..9649d9382e0a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -268,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_fault_error(struct pt_regs *regs, int access,
- vm_fault_t fault)
+static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
{
int si_code;
@@ -516,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs)
fault = do_exception(regs, access);
}
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_protection_exception);
@@ -528,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs)
access = VM_ACCESS_FLAGS;
fault = do_exception(regs, access);
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_dat_exception);
@@ -803,7 +802,7 @@ void do_secure_storage_access(struct pt_regs *regs)
addr = __gmap_translate(gmap, addr);
mmap_read_unlock(mm);
if (IS_ERR_VALUE(addr)) {
- do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
break;
}
fallthrough;
@@ -813,7 +812,7 @@ void do_secure_storage_access(struct pt_regs *regs)
vma = find_vma(mm, addr);
if (!vma) {
mmap_read_unlock(mm);
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
break;
}
page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
@@ -836,7 +835,7 @@ void do_secure_storage_access(struct pt_regs *regs)
BUG();
break;
default:
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
}
}
@@ -848,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs)
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
if (get_fault_type(regs) != GMAP_FAULT) {
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
return;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 62758cb5872f..02d15c8dc92e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2515,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = {
static inline void thp_split_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
walk_page_vma(vma, &thp_split_walk_ops, NULL);
@@ -2584,8 +2585,9 @@ int gmap_mark_unmergeable(void)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int ret;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vma->vm_flags);
if (ret)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 10e51ef9c79a..c299a18273ff 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -237,16 +237,6 @@ int pud_huge(pud_t pud)
return pud_large(pud);
}
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags)
-{
- if (flags & FOLL_GET)
- return NULL;
-
- return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 4a154a084966..97d66a3e60fb 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -37,7 +37,7 @@
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index d6d84e02f35a..1571cdcb0c50 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,10 +12,17 @@
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
#include <asm/asm-extable.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
+#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
+#include <asm/maccess.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+static __ro_after_init pte_t *memcpy_real_ptep;
+static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
{
@@ -76,118 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
return dst;
}
-static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
+void __init memcpy_real_init(void)
{
- union register_pair _dst, _src;
- int rc = -EFAULT;
-
- _dst.even = (unsigned long) dest;
- _dst.odd = (unsigned long) count;
- _src.even = (unsigned long) src;
- _src.odd = (unsigned long) count;
- asm volatile (
- "0: mvcle %[dst],%[src],0\n"
- "1: jo 0b\n"
- " lhi %[rc],0\n"
- "2:\n"
- EX_TABLE(1b,2b)
- : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
- : : "cc", "memory");
- return rc;
-}
-
-static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
- unsigned long src,
- unsigned long count)
-{
- int irqs_disabled, rc;
- unsigned long flags;
-
- if (!count)
- return 0;
- flags = arch_local_irq_save();
- irqs_disabled = arch_irqs_disabled_flags(flags);
- if (!irqs_disabled)
- trace_hardirqs_off();
- __arch_local_irq_stnsm(0xf8); // disable DAT
- rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
- if (flags & PSW_MASK_DAT)
- __arch_local_irq_stosm(0x04); // enable DAT
- if (!irqs_disabled)
- trace_hardirqs_on();
- __arch_local_irq_ssm(flags);
- return rc;
+ memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
+ if (!memcpy_real_ptep)
+ panic("Couldn't setup memcpy real area");
}
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, unsigned long src, size_t count)
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long _dest = (unsigned long)dest;
- unsigned long _src = (unsigned long)src;
- unsigned long _count = (unsigned long)count;
- int rc;
-
- if (S390_lowcore.nodat_stack != 0) {
- preempt_disable();
- rc = call_on_stack(3, S390_lowcore.nodat_stack,
- unsigned long, _memcpy_real,
- unsigned long, _dest,
- unsigned long, _src,
- unsigned long, _count);
- preempt_enable();
- return rc;
+ size_t len, copied, res = 0;
+ unsigned long phys, offset;
+ void *chunk;
+ pte_t pte;
+
+ while (count) {
+ phys = src & PAGE_MASK;
+ offset = src & ~PAGE_MASK;
+ chunk = (void *)(__memcpy_real_area + offset);
+ len = min(count, PAGE_SIZE - offset);
+ pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+ mutex_lock(&memcpy_real_mutex);
+ if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+ __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+ set_pte(memcpy_real_ptep, pte);
+ }
+ copied = copy_to_iter(chunk, len, iter);
+ mutex_unlock(&memcpy_real_mutex);
+
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- /*
- * This is a really early memcpy_real call, the stacks are
- * not set up yet. Just call _memcpy_real on the early boot
- * stack
- */
- return _memcpy_real(_dest, _src, _count);
+ return res;
}
-/*
- * Copy memory in absolute mode (kernel to kernel)
- */
-void memcpy_absolute(void *dest, void *src, size_t count)
+int memcpy_real(void *dest, unsigned long src, size_t count)
{
- unsigned long cr0, flags, prefix;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- prefix = store_prefix();
- if (prefix) {
- local_mcck_disable();
- set_prefix(0);
- memcpy(dest, src, count);
- set_prefix(prefix);
- local_mcck_enable();
- } else {
- memcpy(dest, src, count);
- }
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dest;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (memcpy_real_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
}
/*
- * Check if physical address is within prefix or zero page
+ * Find CPU that owns swapped prefix page
*/
-static int is_swapped(phys_addr_t addr)
+static int get_swapped_owner(phys_addr_t addr)
{
phys_addr_t lc;
int cpu;
- if (addr < sizeof(struct lowcore))
- return 1;
for_each_online_cpu(cpu) {
lc = virt_to_phys(lowcore_ptr[cpu]);
if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
continue;
- return 1;
+ return cpu;
}
- return 0;
+ return -1;
}
/*
@@ -200,17 +161,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
{
void *ptr = phys_to_virt(addr);
void *bounce = ptr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned long size;
+ int this_cpu, cpu;
cpus_read_lock();
- preempt_disable();
- if (is_swapped(addr)) {
- size = PAGE_SIZE - (addr & ~PAGE_MASK);
- bounce = (void *) __get_free_page(GFP_ATOMIC);
- if (bounce)
- memcpy_absolute(bounce, ptr, size);
+ this_cpu = get_cpu();
+ if (addr >= sizeof(struct lowcore)) {
+ cpu = get_swapped_owner(addr);
+ if (cpu < 0)
+ goto out;
+ }
+ bounce = (void *)__get_free_page(GFP_ATOMIC);
+ if (!bounce)
+ goto out;
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ if (addr < sizeof(struct lowcore)) {
+ abs_lc = get_abs_lowcore(&flags);
+ ptr = (void *)abs_lc + addr;
+ memcpy(bounce, ptr, size);
+ put_abs_lowcore(abs_lc, flags);
+ } else if (cpu == this_cpu) {
+ ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+ memcpy(bounce, ptr, size);
+ } else {
+ memcpy(bounce, ptr, size);
}
- preempt_enable();
+out:
+ put_cpu();
cpus_read_unlock();
return bounce;
}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5980ce348832..3327c47bc181 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,7 +37,7 @@ static inline int mmap_is_legacy(struct rlimit *rlim_stack)
unsigned long arch_mmap_rnd(void)
{
- return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c2583f921ca8..ee1a97078527 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -240,7 +240,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
} else if (pmd_none(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE) &&
- MACHINE_HAS_EDAT1 && addr && direct &&
+ MACHINE_HAS_EDAT1 && direct &&
!debug_pagealloc_enabled()) {
set_pmd(pmd, __pmd(__pa(addr) | prot));
pages++;
@@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE) &&
- MACHINE_HAS_EDAT2 && addr && direct &&
+ MACHINE_HAS_EDAT2 && direct &&
!debug_pagealloc_enabled()) {
set_pud(pud, __pud(__pa(addr) | prot));
pages++;
@@ -561,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
}
/*
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserverd for 4KB mappings only.
+ */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+ pte_t *ptep = NULL;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd)) {
+ if (!alloc)
+ goto out;
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4d)
+ goto out;
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ if (!alloc)
+ goto out;
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pud)
+ goto out;
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ if (!alloc)
+ goto out;
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+ } else if (WARN_ON_ONCE(pud_large(*pud))) {
+ goto out;
+ }
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ if (!alloc)
+ goto out;
+ pte = vmem_pte_alloc();
+ if (!pte)
+ goto out;
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+ goto out;
+ }
+ ptep = pte_offset_kernel(pmd, addr);
+out:
+ return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+ pte_t *ptep, pte;
+
+ if (!IS_ALIGNED(addr, PAGE_SIZE))
+ return -EINVAL;
+ ptep = vmem_get_alloc_pte(addr, alloc);
+ if (!ptep)
+ return -ENOMEM;
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte = mk_pte_phys(phys, prot);
+ set_pte(ptep, pte);
+ return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+ int rc;
+
+ mutex_lock(&vmem_mutex);
+ rc = __vmem_map_4k_page(addr, phys, prot, true);
+ mutex_unlock(&vmem_mutex);
+ return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+ pte_t *ptep;
+
+ mutex_lock(&vmem_mutex);
+ ptep = virt_to_kpte(addr);
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte_clear(&init_mm, addr, ptep);
+ mutex_unlock(&vmem_mutex);
+}
+
+/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
* additional memory segments.
@@ -584,6 +681,9 @@ void __init vmem_map_init(void)
__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+ /* lowcore requires 4k mapping for real addresses / prefixing */
+ set_memory_4k(0, LC_PAGES);
+
/* lowcore must be executable for LPSWE */
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f46833a25526..227cf0a62800 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -666,7 +666,7 @@ static int __init dma_alloc_cpu_table_caches(void)
int __init zpci_dma_init(void)
{
- s390_iommu_aperture = (u64)high_memory;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
if (!s390_iommu_aperture_factor)
s390_iommu_aperture = ULONG_MAX;
else
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 080c88620723..588089332931 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -64,7 +64,7 @@ static inline int __pcistg_mio_inuser(
asm volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
- " sllg %[val],%[val],8\n"
+ "4: sllg %[val],%[val],8\n"
" aghi %[src],1\n"
" ogr %[val],%[tmp]\n"
" brctg %[cnt],0b\n"
@@ -72,7 +72,7 @@ static inline int __pcistg_mio_inuser(
"2: ipm %[cc]\n"
" srl %[cc],28\n"
"3: sacf 768\n"
- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
:
[src] "+a" (src), [cnt] "+d" (cnt),
[val] "+d" (val), [tmp] "=d" (tmp),
@@ -215,10 +215,10 @@ static inline int __pcilg_mio_inuser(
"2: ahi %[shift],-8\n"
" srlg %[tmp],%[val],0(%[shift])\n"
"3: stc %[tmp],0(%[dst])\n"
- " aghi %[dst],1\n"
+ "5: aghi %[dst],1\n"
" brctg %[cnt],2b\n"
"4: sacf 768\n"
- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
:
[ioaddr_len] "+&d" (ioaddr_len.pair),
[cc] "+d" (cc), [val] "=d" (val),