aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig172
-rw-r--r--arch/s390/Kconfig.debug12
-rw-r--r--arch/s390/Makefile48
-rw-r--r--arch/s390/boot/.gitignore3
-rw-r--r--arch/s390/boot/Makefile79
-rw-r--r--arch/s390/boot/boot.h6
-rw-r--r--arch/s390/boot/clz_ctz.c (renamed from arch/s390/boot/compressed/clz_ctz.c)0
-rw-r--r--arch/s390/boot/compressed/.gitignore4
-rw-r--r--arch/s390/boot/compressed/Makefile88
-rw-r--r--arch/s390/boot/decompressor.c (renamed from arch/s390/boot/compressed/decompressor.c)0
-rw-r--r--arch/s390/boot/decompressor.h (renamed from arch/s390/boot/compressed/decompressor.h)0
-rw-r--r--arch/s390/boot/head.S367
-rwxr-xr-x[-rw-r--r--]arch/s390/boot/install.sh6
-rw-r--r--arch/s390/boot/ipl_data.c84
-rw-r--r--arch/s390/boot/ipl_parm.c7
-rw-r--r--arch/s390/boot/kaslr.c2
-rw-r--r--arch/s390/boot/mem_detect.c2
-rw-r--r--arch/s390/boot/startup.c21
-rw-r--r--arch/s390/boot/uv.c11
-rw-r--r--arch/s390/boot/uv.h7
-rw-r--r--arch/s390/boot/version.c1
-rw-r--r--arch/s390/boot/vmlinux.lds.S (renamed from arch/s390/boot/compressed/vmlinux.lds.S)22
-rw-r--r--arch/s390/configs/btf.config1
-rw-r--r--arch/s390/configs/debug_defconfig110
-rw-r--r--arch/s390/configs/defconfig102
-rw-r--r--arch/s390/configs/kasan.config3
-rw-r--r--arch/s390/configs/zfcpdump_defconfig15
-rw-r--r--arch/s390/crypto/Kconfig135
-rw-r--r--arch/s390/crypto/Makefile4
-rw-r--r--arch/s390/crypto/aes_s390.c6
-rw-r--r--arch/s390/crypto/arch_random.c217
-rw-r--r--arch/s390/crypto/chacha-glue.c130
-rw-r--r--arch/s390/crypto/chacha-s390.S907
-rw-r--r--arch/s390/crypto/chacha-s390.h14
-rw-r--r--arch/s390/crypto/crc32-vx.c2
-rw-r--r--arch/s390/crypto/des_s390.c4
-rw-r--r--arch/s390/crypto/ghash_s390.c2
-rw-r--r--arch/s390/crypto/prng.c4
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_512_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c34
-rw-r--r--arch/s390/hypfs/hypfs_diag.c2
-rw-r--r--arch/s390/hypfs/hypfs_vm.c9
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/s390/include/asm/abs_lowcore.h17
-rw-r--r--arch/s390/include/asm/airq.h7
-rw-r--r--arch/s390/include/asm/alternative-asm.h70
-rw-r--r--arch/s390/include/asm/alternative.h86
-rw-r--r--arch/s390/include/asm/ap.h69
-rw-r--r--arch/s390/include/asm/archrandom.h40
-rw-r--r--arch/s390/include/asm/asm-extable.h88
-rw-r--r--arch/s390/include/asm/barrier.h16
-rw-r--r--arch/s390/include/asm/bitops.h76
-rw-r--r--arch/s390/include/asm/bug.h5
-rw-r--r--arch/s390/include/asm/ccwdev.h1
-rw-r--r--arch/s390/include/asm/ccwgroup.h2
-rw-r--r--arch/s390/include/asm/chsc.h2
-rw-r--r--arch/s390/include/asm/cio.h2
-rw-r--r--arch/s390/include/asm/compat.h124
-rw-r--r--arch/s390/include/asm/cpu_mf.h17
-rw-r--r--arch/s390/include/asm/cpufeature.h23
-rw-r--r--arch/s390/include/asm/crw.h1
-rw-r--r--arch/s390/include/asm/ctl_reg.h23
-rw-r--r--arch/s390/include/asm/diag.h5
-rw-r--r--arch/s390/include/asm/dma.h6
-rw-r--r--arch/s390/include/asm/eadm.h2
-rw-r--r--arch/s390/include/asm/entry-common.h17
-rw-r--r--arch/s390/include/asm/extable.h46
-rw-r--r--arch/s390/include/asm/fcx.h4
-rw-r--r--arch/s390/include/asm/fpu/api.h1
-rw-r--r--arch/s390/include/asm/ftrace.h10
-rw-r--r--arch/s390/include/asm/futex.h4
-rw-r--r--arch/s390/include/asm/gmap.h39
-rw-r--r--arch/s390/include/asm/hugetlb.h36
-rw-r--r--arch/s390/include/asm/idals.h2
-rw-r--r--arch/s390/include/asm/ipl.h6
-rw-r--r--arch/s390/include/asm/irq.h9
-rw-r--r--arch/s390/include/asm/jump_label.h5
-rw-r--r--arch/s390/include/asm/kexec.h26
-rw-r--r--arch/s390/include/asm/kprobes.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h41
-rw-r--r--arch/s390/include/asm/linkage.h18
-rw-r--r--arch/s390/include/asm/livepatch.h22
-rw-r--r--arch/s390/include/asm/lowcore.h33
-rw-r--r--arch/s390/include/asm/maccess.h17
-rw-r--r--arch/s390/include/asm/mmu.h17
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/nmi.h9
-rw-r--r--arch/s390/include/asm/nospec-insn.h155
-rw-r--r--arch/s390/include/asm/os_info.h3
-rw-r--r--arch/s390/include/asm/page.h32
-rw-r--r--arch/s390/include/asm/pai.h78
-rw-r--r--arch/s390/include/asm/pci.h16
-rw-r--r--arch/s390/include/asm/pci_clp.h9
-rw-r--r--arch/s390/include/asm/pci_debug.h7
-rw-r--r--arch/s390/include/asm/pci_dma.h29
-rw-r--r--arch/s390/include/asm/pci_insn.h29
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h311
-rw-r--r--arch/s390/include/asm/preempt.h15
-rw-r--r--arch/s390/include/asm/processor.h41
-rw-r--r--arch/s390/include/asm/ptrace.h31
-rw-r--r--arch/s390/include/asm/qdio.h31
-rw-r--r--arch/s390/include/asm/sclp.h12
-rw-r--r--arch/s390/include/asm/scsw.h88
-rw-r--r--arch/s390/include/asm/smp.h5
-rw-r--r--arch/s390/include/asm/softirq_stack.h3
-rw-r--r--arch/s390/include/asm/spinlock.h3
-rw-r--r--arch/s390/include/asm/stacktrace.h23
-rw-r--r--arch/s390/include/asm/stp.h4
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h2
-rw-r--r--arch/s390/include/asm/sysinfo.h6
-rw-r--r--arch/s390/include/asm/termios.h26
-rw-r--r--arch/s390/include/asm/timex.h7
-rw-r--r--arch/s390/include/asm/tlb.h3
-rw-r--r--arch/s390/include/asm/tlbflush.h4
-rw-r--r--arch/s390/include/asm/tpi.h13
-rw-r--r--arch/s390/include/asm/uaccess.h369
-rw-r--r--arch/s390/include/asm/unistd.h1
-rw-r--r--arch/s390/include/asm/unwind.h13
-rw-r--r--arch/s390/include/asm/user.h4
-rw-r--r--arch/s390/include/asm/uv.h107
-rw-r--r--arch/s390/include/asm/vx-insn.h121
-rw-r--r--arch/s390/include/uapi/asm/dasd.h14
-rw-r--r--arch/s390/include/uapi/asm/hwctrset.h6
-rw-r--r--arch/s390/include/uapi/asm/kvm.h1
-rw-r--r--arch/s390/include/uapi/asm/pkey.h2
-rw-r--r--arch/s390/include/uapi/asm/signal.h2
-rw-r--r--arch/s390/include/uapi/asm/termios.h50
-rw-r--r--arch/s390/include/uapi/asm/uvdevice.h51
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h44
-rw-r--r--arch/s390/kernel/Makefile14
-rw-r--r--arch/s390/kernel/abs_lowcore.c95
-rw-r--r--arch/s390/kernel/alternative.c61
-rw-r--r--arch/s390/kernel/asm-offsets.c34
-rw-r--r--arch/s390/kernel/base.S41
-rw-r--r--arch/s390/kernel/cache.c7
-rw-r--r--arch/s390/kernel/compat_linux.h89
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/cpufeature.c46
-rw-r--r--arch/s390/kernel/crash_dump.c133
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/diag.c1
-rw-r--r--arch/s390/kernel/dis.c1
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/early.c29
-rw-r--r--arch/s390/kernel/earlypgm.S23
-rw-r--r--arch/s390/kernel/entry.S83
-rw-r--r--arch/s390/kernel/entry.h3
-rw-r--r--arch/s390/kernel/ftrace.c107
-rw-r--r--arch/s390/kernel/ftrace.h2
-rw-r--r--arch/s390/kernel/head64.S1
-rw-r--r--arch/s390/kernel/ipl.c11
-rw-r--r--arch/s390/kernel/irq.c15
-rw-r--r--arch/s390/kernel/jump_label.c28
-rw-r--r--arch/s390/kernel/kprobes.c49
-rw-r--r--arch/s390/kernel/lgr.c3
-rw-r--r--arch/s390/kernel/machine_kexec.c23
-rw-r--r--arch/s390/kernel/machine_kexec_file.c56
-rw-r--r--arch/s390/kernel/mcount.S69
-rw-r--r--arch/s390/kernel/module.c55
-rw-r--r--arch/s390/kernel/nmi.c89
-rw-r--r--arch/s390/kernel/nospec-branch.c31
-rw-r--r--arch/s390/kernel/numa.c7
-rw-r--r--arch/s390/kernel/os_info.c20
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c34
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c4
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c154
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kernel/perf_event.c2
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c699
-rw-r--r--arch/s390/kernel/perf_pai_ext.c672
-rw-r--r--arch/s390/kernel/process.c39
-rw-r--r--arch/s390/kernel/processor.c36
-rw-r--r--arch/s390/kernel/ptrace.c165
-rw-r--r--arch/s390/kernel/relocate_kernel.S6
-rw-r--r--arch/s390/kernel/setup.c60
-rw-r--r--arch/s390/kernel/signal.c7
-rw-r--r--arch/s390/kernel/smp.c177
-rw-r--r--arch/s390/kernel/stacktrace.c1
-rw-r--r--arch/s390/kernel/syscalls/Makefile3
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/sysinfo.c1
-rw-r--r--arch/s390/kernel/text_amode31.S1
-rw-r--r--arch/s390/kernel/time.c12
-rw-r--r--arch/s390/kernel/topology.c1
-rw-r--r--arch/s390/kernel/traps.c22
-rw-r--r--arch/s390/kernel/unwind_bc.c12
-rw-r--r--arch/s390/kernel/uprobes.c16
-rw-r--r--arch/s390/kernel/uv.c166
-rw-r--r--arch/s390/kernel/vdso.c58
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/kernel/vtime.c13
-rw-r--r--arch/s390/kvm/Kconfig2
-rw-r--r--arch/s390/kvm/Makefile9
-rw-r--r--arch/s390/kvm/gaccess.c520
-rw-r--r--arch/s390/kvm/gaccess.h90
-rw-r--r--arch/s390/kvm/intercept.c27
-rw-r--r--arch/s390/kvm/interrupt.c168
-rw-r--r--arch/s390/kvm/kvm-s390.c859
-rw-r--r--arch/s390/kvm/kvm-s390.h51
-rw-r--r--arch/s390/kvm/pci.c702
-rw-r--r--arch/s390/kvm/pci.h87
-rw-r--r--arch/s390/kvm/priv.c108
-rw-r--r--arch/s390/kvm/pv.c280
-rw-r--r--arch/s390/kvm/sigp.c32
-rw-r--r--arch/s390/kvm/vsie.c12
-rw-r--r--arch/s390/lib/Makefile5
-rw-r--r--arch/s390/lib/delay.c12
-rw-r--r--arch/s390/lib/expoline/Makefile3
-rw-r--r--arch/s390/lib/expoline/expoline.S12
-rw-r--r--arch/s390/lib/spinlock.c4
-rw-r--r--arch/s390/lib/test_modules.c32
-rw-r--r--arch/s390/lib/test_modules.h53
-rw-r--r--arch/s390/lib/test_modules_helpers.c13
-rw-r--r--arch/s390/lib/test_unwind.c316
-rw-r--r--arch/s390/lib/uaccess.c231
-rw-r--r--arch/s390/lib/xor.c21
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/cmm.c2
-rw-r--r--arch/s390/mm/dump_pagetables.c20
-rw-r--r--arch/s390/mm/extable.c81
-rw-r--r--arch/s390/mm/fault.c110
-rw-r--r--arch/s390/mm/gmap.c226
-rw-r--r--arch/s390/mm/hugetlbpage.c47
-rw-r--r--arch/s390/mm/init.c21
-rw-r--r--arch/s390/mm/kasan_init.c8
-rw-r--r--arch/s390/mm/maccess.c217
-rw-r--r--arch/s390/mm/mmap.c26
-rw-r--r--arch/s390/mm/page-states.c1
-rw-r--r--arch/s390/mm/pageattr.c33
-rw-r--r--arch/s390/mm/pgalloc.c252
-rw-r--r--arch/s390/mm/pgtable.c46
-rw-r--r--arch/s390/mm/vmem.c123
-rw-r--r--arch/s390/net/bpf_jit_comp.c62
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c25
-rw-r--r--arch/s390/pci/pci_bus.c82
-rw-r--r--arch/s390/pci/pci_bus.h7
-rw-r--r--arch/s390/pci/pci_clp.c19
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/s390/pci/pci_dma.c28
-rw-r--r--arch/s390/pci/pci_event.c10
-rw-r--r--arch/s390/pci/pci_insn.c118
-rw-r--r--arch/s390/pci/pci_irq.c68
-rw-r--r--arch/s390/pci/pci_kvm_hook.c11
-rw-r--r--arch/s390/pci/pci_mmio.c9
-rw-r--r--arch/s390/purgatory/Makefile5
-rw-r--r--arch/s390/purgatory/head.S30
-rwxr-xr-xarch/s390/tools/gcc-thunk-extern.sh24
-rw-r--r--arch/s390/tools/gen_facilities.c9
-rw-r--r--arch/s390/tools/opcodes.txt3
254 files changed, 10223 insertions, 4232 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a9475cbc8c..de575af02ffe 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -55,11 +55,14 @@ config S390
# Note: keep this list sorted alphabetically
#
imply IMA_SECURE_AND_OR_TRUSTED_BOOT
+ select ALTERNATE_USER_ADDRESS_SPACE
select ARCH_32BIT_USTAT_F_TINODE
select ARCH_BINFMT_ELF_STATE
+ select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
@@ -120,14 +123,13 @@ config S390
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
- select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select GCC12_NO_ARRAY_BOUNDS
select GENERIC_ALLOCATOR
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
@@ -156,7 +158,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+ select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FENTRY
@@ -202,6 +204,7 @@ config S390
select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI
@@ -231,20 +234,8 @@ source "kernel/livepatch/Kconfig"
menu "Processor type and features"
-config HAVE_MARCH_Z900_FEATURES
- def_bool n
-
-config HAVE_MARCH_Z990_FEATURES
- def_bool n
- select HAVE_MARCH_Z900_FEATURES
-
-config HAVE_MARCH_Z9_109_FEATURES
- def_bool n
- select HAVE_MARCH_Z990_FEATURES
-
config HAVE_MARCH_Z10_FEATURES
def_bool n
- select HAVE_MARCH_Z9_109_FEATURES
config HAVE_MARCH_Z196_FEATURES
def_bool n
@@ -266,45 +257,21 @@ config HAVE_MARCH_Z15_FEATURES
def_bool n
select HAVE_MARCH_Z14_FEATURES
+config HAVE_MARCH_Z16_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z15_FEATURES
+
choice
prompt "Processor type"
default MARCH_Z196
-config MARCH_Z900
- bool "IBM zSeries model z800 and z900"
- select HAVE_MARCH_Z900_FEATURES
- depends on $(cc-option,-march=z900)
- help
- Select this to enable optimizations for model z800/z900 (2064 and
- 2066 series). This will enable some optimizations that are not
- available on older ESA/390 (31 Bit) only CPUs.
-
-config MARCH_Z990
- bool "IBM zSeries model z890 and z990"
- select HAVE_MARCH_Z990_FEATURES
- depends on $(cc-option,-march=z990)
- help
- Select this to enable optimizations for model z890/z990 (2084 and
- 2086 series). The kernel will be slightly faster but will not work
- on older machines.
-
-config MARCH_Z9_109
- bool "IBM System z9"
- select HAVE_MARCH_Z9_109_FEATURES
- depends on $(cc-option,-march=z9-109)
- help
- Select this to enable optimizations for IBM System z9 (2094 and
- 2096 series). The kernel will be slightly faster but will not work
- on older machines.
-
config MARCH_Z10
bool "IBM System z10"
select HAVE_MARCH_Z10_FEATURES
depends on $(cc-option,-march=z10)
help
- Select this to enable optimizations for IBM System z10 (2097 and
- 2098 series). The kernel will be slightly faster but will not work
- on older machines.
+ Select this to enable optimizations for IBM System z10 (2097 and 2098
+ series). This is the oldest machine generation currently supported.
config MARCH_Z196
bool "IBM zEnterprise 114 and 196"
@@ -351,16 +318,15 @@ config MARCH_Z15
and 8561 series). The kernel will be slightly faster but will not
work on older machines.
-endchoice
-
-config MARCH_Z900_TUNE
- def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
-
-config MARCH_Z990_TUNE
- def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT
+config MARCH_Z16
+ bool "IBM z16"
+ select HAVE_MARCH_Z16_FEATURES
+ depends on $(cc-option,-march=z16)
+ help
+ Select this to enable optimizations for IBM z16 (3931 and
+ 3932 series).
-config MARCH_Z9_109_TUNE
- def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT
+endchoice
config MARCH_Z10_TUNE
def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
@@ -380,6 +346,9 @@ config MARCH_Z14_TUNE
config MARCH_Z15_TUNE
def_bool TUNE_Z15 || MARCH_Z15 && TUNE_DEFAULT
+config MARCH_Z16_TUNE
+ def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT
+
choice
prompt "Tune code generation"
default TUNE_DEFAULT
@@ -397,21 +366,8 @@ config TUNE_DEFAULT
Tune the generated code for the target processor for which the kernel
will be compiled.
-config TUNE_Z900
- bool "IBM zSeries model z800 and z900"
- depends on $(cc-option,-mtune=z900)
-
-config TUNE_Z990
- bool "IBM zSeries model z890 and z990"
- depends on $(cc-option,-mtune=z990)
-
-config TUNE_Z9_109
- bool "IBM System z9"
- depends on $(cc-option,-mtune=z9-109)
-
config TUNE_Z10
bool "IBM System z10"
- depends on $(cc-option,-mtune=z10)
config TUNE_Z196
bool "IBM zEnterprise 114 and 196"
@@ -433,6 +389,10 @@ config TUNE_Z15
bool "IBM z15"
depends on $(cc-option,-mtune=z15)
+config TUNE_Z16
+ bool "IBM z16"
+ depends on $(cc-option,-mtune=z16)
+
endchoice
config 64BIT
@@ -460,9 +420,6 @@ config COMPAT
(and some other stuff like libraries and such) is needed for
executing 31 bit applications. It is safe to say "Y".
-config SYSVIPC_COMPAT
- def_bool y if COMPAT && SYSVIPC
-
config SMP
def_bool y
@@ -528,7 +485,6 @@ config KEXEC
config KEXEC_FILE
bool "kexec file based system call"
select KEXEC_CORE
- select BUILD_BIN2C
depends on CRYPTO
depends on CRYPTO_SHA256
depends on CRYPTO_SHA256_S390
@@ -552,21 +508,6 @@ config KEXEC_SIG
verification for the corresponding kernel image type being
loaded in order for this to work.
-config ARCH_RANDOM
- def_bool y
- prompt "s390 architectural random number generation API"
- help
- Enable the s390 architectural random number generation API
- to provide random data for all consumers within the Linux
- kernel.
-
- When enabled the arch_random_* functions declared in linux/random.h
- are implemented. The implementation is based on the s390 CPACF
- instruction subfunction TRNG which provides a real true random
- number generator.
-
- If unsure, say Y.
-
config KERNEL_NOBP
def_bool n
prompt "Enable modified branch prediction for the kernel by default"
@@ -586,6 +527,7 @@ config KERNEL_NOBP
config EXPOLINE
def_bool n
+ depends on $(cc-option,-mindirect-branch=thunk)
prompt "Avoid speculative indirect branches in the kernel"
help
Compile the kernel with the expoline compiler options to guard
@@ -596,6 +538,19 @@ config EXPOLINE
If unsure, say N.
+config EXPOLINE_EXTERN
+ def_bool n
+ depends on EXPOLINE
+ depends on CC_IS_GCC && GCC_VERSION >= 110200
+ depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
+ prompt "Generate expolines as extern functions."
+ help
+ This option is required for some tooling like kpatch. The kernel is
+ compiled with -mindirect-branch=thunk-extern and requires a newer
+ compiler.
+
+ If unsure, say N.
+
choice
prompt "Expoline default"
depends on EXPOLINE
@@ -613,9 +568,7 @@ config EXPOLINE_FULL
endchoice
config RELOCATABLE
- bool "Build a relocatable kernel"
- select MODULE_REL_CRCS if MODVERSIONS
- default y
+ def_bool y
help
This builds a kernel image that retains relocation information
so it can be loaded at an arbitrary address.
@@ -624,10 +577,11 @@ config RELOCATABLE
bootup process.
The relocations make the kernel image about 15% larger (compressed
10%), but are discarded at runtime.
+ Note: this option exists only for documentation purposes, please do
+ not remove it.
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image (KASLR)"
- depends on RELOCATABLE
default y
help
In support of Kernel Address Space Layout Randomization (KASLR),
@@ -657,20 +611,6 @@ config MAX_PHYSMEM_BITS
Increasing the number of bits also increases the kernel image size.
By default 46 bits (64TB) are supported.
-config PACK_STACK
- def_bool y
- prompt "Pack kernel stack"
- help
- This option enables the compiler option -mkernel-backchain if it
- is available. If the option is available the compiler supports
- the new stack layout which dramatically reduces the minimum stack
- frame size. With an old compiler a non-leaf function needs a
- minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With
- -mkernel-backchain the minimum size drops to 16 byte on 31 bit
- and 24 byte on 64 bit.
-
- Say Y if you are unsure.
-
config CHECK_STACK
def_bool y
depends on !VMAP_STACK
@@ -778,11 +718,11 @@ config VFIO_AP
depends on S390_AP_IOMMU && VFIO_MDEV && KVM
depends on ZCRYPT
help
- This driver grants access to Adjunct Processor (AP) devices
- via the VFIO mediated device interface.
+ This driver grants access to Adjunct Processor (AP) devices
+ via the VFIO mediated device interface.
- To compile this driver as a module, choose M here: the module
- will be called vfio_ap.
+ To compile this driver as a module, choose M here: the module
+ will be called vfio_ap.
endmenu
@@ -814,7 +754,6 @@ menu "Virtualization"
config PROTECTED_VIRTUALIZATION_GUEST
def_bool n
prompt "Protected virtualization guest support"
- select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
help
Select this option, if you want to be able to run this
kernel as a protected virtualization KVM guest.
@@ -946,6 +885,9 @@ config S390_GUEST
endmenu
+config S390_MODULES_SANITY_TEST_HELPERS
+ def_bool n
+
menu "Selftests"
config S390_UNWIND_SELFTEST
@@ -972,4 +914,16 @@ config S390_KPROBES_SANITY_TEST
Say N if you are unsure.
+config S390_MODULES_SANITY_TEST
+ def_tristate n
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ prompt "Enable s390 specific modules tests"
+ select S390_MODULES_SANITY_TEST_HELPERS
+ help
+ This option enables an s390 specific modules test. This option is
+ not useful for distributions or general kernels, but only for
+ kernel developers working on architecture code.
+
+ Say N if you are unsure.
endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index e94a2a7f6bf4..c4300ea4abf8 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -14,9 +14,9 @@ config DEBUG_ENTRY
If unsure, say N.
config CIO_INJECT
- bool "CIO Inject interfaces"
- depends on DEBUG_KERNEL && DEBUG_FS
- help
- This option provides a debugging facility to inject certain artificial events
- and instruction responses to the CIO layer of Linux kernel. The newly created
- debugfs user-interfaces will be at /sys/kernel/debug/s390/cio/*
+ bool "CIO Inject interfaces"
+ depends on DEBUG_KERNEL && DEBUG_FS
+ help
+ This option provides a debugging facility to inject certain artificial events
+ and instruction responses to the CIO layer of Linux kernel. The newly created
+ debugfs user-interfaces will be at /sys/kernel/debug/s390/cio/*
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 609e3697324b..b3235ab0ace8 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,14 +14,14 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
-ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS += -fPIE
LDFLAGS_vmlinux := -pie
-endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
+ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
-KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
+endif
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
@@ -30,36 +30,34 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
+
UTS_MACHINE := s390x
STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
CHECKFLAGS += -D__s390__ -D__s390x__
export LD_BFD
-mflags-$(CONFIG_MARCH_Z900) := -march=z900
-mflags-$(CONFIG_MARCH_Z990) := -march=z990
-mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
mflags-$(CONFIG_MARCH_Z10) := -march=z10
mflags-$(CONFIG_MARCH_Z196) := -march=z196
mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
mflags-$(CONFIG_MARCH_Z13) := -march=z13
mflags-$(CONFIG_MARCH_Z14) := -march=z14
mflags-$(CONFIG_MARCH_Z15) := -march=z15
+mflags-$(CONFIG_MARCH_Z16) := -march=z16
export CC_FLAGS_MARCH := $(mflags-y)
aflags-y += $(mflags-y)
cflags-y += $(mflags-y)
-cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900
-cflags-$(CONFIG_MARCH_Z990_TUNE) += -mtune=z990
-cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109
cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10
cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196
cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12
cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13
cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14
cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15
+cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16
cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
@@ -68,11 +66,6 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
#
cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
-ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),)
-cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
-aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-endif
-
KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
@@ -86,14 +79,18 @@ ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),)
endif
ifdef CONFIG_EXPOLINE
- ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),)
+ ifdef CONFIG_EXPOLINE_EXTERN
+ KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
+ CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
+ CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
+ else
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
CC_FLAGS_EXPOLINE += -mfunction-return=thunk
- CC_FLAGS_EXPOLINE += -mindirect-branch-table
- export CC_FLAGS_EXPOLINE
- cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
- aflags-y += -DCC_USING_EXPOLINE
endif
+ CC_FLAGS_EXPOLINE += -mindirect-branch-table
+ export CC_FLAGS_EXPOLINE
+ cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
+ aflags-y += -DCC_USING_EXPOLINE
endif
ifdef CONFIG_FUNCTION_TRACER
@@ -111,7 +108,7 @@ endif
# Test CFI features of binutils
cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
-KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
+KBUILD_CFLAGS += -mpacked-stack -mbackchain -msoft-float $(cflags-y)
KBUILD_CFLAGS += -pipe -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi)
KBUILD_AFLAGS += $(aflags-y) $(cfi)
@@ -120,8 +117,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
OBJCOPYFLAGS := -O binary
-head-y := arch/s390/kernel/head64.o
-
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
@@ -135,8 +130,7 @@ all: bzImage
KBUILD_IMAGE := $(boot)/bzImage
install:
- sh -x $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \
- System.map "$(INSTALL_PATH)"
+ $(call cmd,install)
bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@@ -165,6 +159,12 @@ vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+
+ifdef CONFIG_EXPOLINE_EXTERN
+modules_prepare: expoline_prepare
+expoline_prepare:
+ $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+endif
endif
# Don't use tabs in echo arguments
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index b265bfede188..f56591bc0897 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -2,3 +2,6 @@
image
bzImage
section_cmp.*
+vmlinux
+vmlinux.lds
+vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ba646899131..d52c3e2e16bc 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,14 +37,20 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
-obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
-targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
-subdir- := compressed
+obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-all := $(obj-y) piggy.o syms.o
+
+targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
+targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
OBJECTS := $(addprefix $(obj)/,$(obj-y))
+OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
quiet_cmd_section_cmp = SECTCMP $*
define cmd_section_cmp
@@ -59,14 +65,67 @@ define cmd_section_cmp
touch $@
endef
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
+$(obj)/bzImage: $(obj)/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
-$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
+$(obj)/section_cmp%: vmlinux $(obj)/vmlinux FORCE
$(call if_changed,section_cmp)
-$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
- $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS_ALL) FORCE
+ $(call if_changed,ld)
+
+LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(OBJECTS) FORCE
+ $(call if_changed,ld)
+
+quiet_cmd_dumpsyms = DUMPSYMS $<
+define cmd_dumpsyms
+ $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
+endef
+
+$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
+ $(call if_changed,dumpsyms)
+
+OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
+$(obj)/syms.o: $(obj)/syms.bin FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
+$(obj)/info.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
+$(obj)/info.o: $(obj)/info.bin FORCE
+ $(call if_changed,objcopy)
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+suffix-$(CONFIG_KERNEL_GZIP) := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4) := .lz4
+suffix-$(CONFIG_KERNEL_LZMA) := .lzma
+suffix-$(CONFIG_KERNEL_LZO) := .lzo
+suffix-$(CONFIG_KERNEL_XZ) := .xz
+suffix-$(CONFIG_KERNEL_ZSTD) := .zst
-$(obj)/startup.a: $(OBJECTS) FORCE
- $(call if_changed,ar)
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2_with_size)
+$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lz4_with_size)
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzma_with_size)
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo_with_size)
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern_with_size)
+$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,zstd22_with_size)
+
+OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
+$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
+ $(call if_changed,objcopy)
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 641ce0fc5c3e..70418389414d 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -2,9 +2,12 @@
#ifndef BOOT_BOOT_H
#define BOOT_BOOT_H
-#include <asm/extable.h>
#include <linux/types.h>
+#define IPL_START 0x200
+
+#ifndef __ASSEMBLY__
+
void startup_kernel(void);
unsigned long detect_memory(void);
bool is_ipl_block_dump(void);
@@ -31,4 +34,5 @@ extern char _stack_start[], _stack_end[];
unsigned long read_ipl_report(unsigned long safe_offset);
+#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/clz_ctz.c
index c3ebf248596b..c3ebf248596b 100644
--- a/arch/s390/boot/compressed/clz_ctz.c
+++ b/arch/s390/boot/clz_ctz.c
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
deleted file mode 100644
index 01d93832cf4a..000000000000
--- a/arch/s390/boot/compressed/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-vmlinux
-vmlinux.lds
-vmlinux.syms
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
deleted file mode 100644
index 3b860061e84d..000000000000
--- a/arch/s390/boot/compressed/Makefile
+++ /dev/null
@@ -1,88 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# linux/arch/s390/boot/compressed/Makefile
-#
-# create a compressed vmlinux image from the original vmlinux
-#
-
-KCOV_INSTRUMENT := n
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
-obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
-obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
-obj-all := $(obj-y) piggy.o syms.o
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
-targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += vmlinux.bin.zst
-targets += info.bin syms.bin vmlinux.syms $(obj-all)
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
-OBJCOPYFLAGS :=
-
-OBJECTS := $(addprefix $(obj)/,$(obj-y))
-OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
-
-LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup --build-id=sha1 -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS_ALL) FORCE
- $(call if_changed,ld)
-
-LDFLAGS_vmlinux.syms := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux.syms: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
- $(call if_changed,ld)
-
-quiet_cmd_dumpsyms = DUMPSYMS $<
-define cmd_dumpsyms
- $(NM) -n -S --format=bsd "$<" | sed -nE 's/^0*([0-9a-fA-F]+) 0*([0-9a-fA-F]+) [tT] ([^ ]*)$$/\1 \2 \3/p' | tr '\n' '\0' > "$@"
-endef
-
-$(obj)/syms.bin: $(obj)/vmlinux.syms FORCE
- $(call if_changed,dumpsyms)
-
-OBJCOPYFLAGS_syms.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.decompressor.syms
-$(obj)/syms.o: $(obj)/syms.bin FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
-$(obj)/info.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_info.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.info
-$(obj)/info.o: $(obj)/info.bin FORCE
- $(call if_changed,objcopy)
-
-OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section=.vmlinux.info -S
-$(obj)/vmlinux.bin: vmlinux FORCE
- $(call if_changed,objcopy)
-
-vmlinux.bin.all-y := $(obj)/vmlinux.bin
-
-suffix-$(CONFIG_KERNEL_GZIP) := .gz
-suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
-suffix-$(CONFIG_KERNEL_LZ4) := .lz4
-suffix-$(CONFIG_KERNEL_LZMA) := .lzma
-suffix-$(CONFIG_KERNEL_LZO) := .lzo
-suffix-$(CONFIG_KERNEL_XZ) := .xz
-suffix-$(CONFIG_KERNEL_ZSTD) := .zst
-
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,xzkern)
-$(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE
- $(call if_changed,zstd22)
-
-OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
-$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
- $(call if_changed,objcopy)
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/decompressor.c
index e27c2140d620..e27c2140d620 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/decompressor.c
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..f75cc31a77dd 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/decompressor.h
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 3a252d140c55..3f79b9efb803 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -5,7 +5,6 @@
* Author(s): Hartmut Penner <hp@de.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* Rob van der Heij <rvdhei@iae.nl>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*
* There are 5 different IPL methods
* 1) load the image directly into ram at address 0 and do an PSW restart
@@ -28,234 +27,181 @@
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
-
-#define ARCH_OFFSET 4
+#include "boot.h"
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
+#define IPL_BS 0x730
__HEAD
-
-#define IPL_BS 0x730
- .org 0
- .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
- .long 0x02000018,0x60000050 # by ipl to addresses 0-23.
- .long 0x02000068,0x60000050 # (a PSW and two CCWs).
- .fill 80-24,1,0x40 # bytes 24-79 are discarded !!
- .long 0x020000f0,0x60000050 # The next 160 byte are loaded
- .long 0x02000140,0x60000050 # to addresses 0x18-0xb7
- .long 0x02000190,0x60000050 # They form the continuation
- .long 0x020001e0,0x60000050 # of the CCW program started
- .long 0x02000230,0x60000050 # by ipl and load the range
- .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image
- .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730
- .long 0x02000320,0x60000050 # in memory. At the end of
- .long 0x02000370,0x60000050 # the channel program the PSW
- .long 0x020003c0,0x60000050 # at location 0 is loaded.
- .long 0x02000410,0x60000050 # Initial processing starts
- .long 0x02000460,0x60000050 # at 0x200 = iplstart.
- .long 0x020004b0,0x60000050
- .long 0x02000500,0x60000050
- .long 0x02000550,0x60000050
- .long 0x020005a0,0x60000050
- .long 0x020005f0,0x60000050
- .long 0x02000640,0x60000050
- .long 0x02000690,0x60000050
- .long 0x020006e0,0x20000050
-
- .org __LC_RST_NEW_PSW # 0x1a0
- .quad 0,iplstart
- .org __LC_EXT_NEW_PSW # 0x1b0
- .quad 0x0002000180000000,0x1b0 # disabled wait
- .org __LC_PGM_NEW_PSW # 0x1d0
- .quad 0x0000000180000000,startup_pgm_check_handler
- .org __LC_IO_NEW_PSW # 0x1f0
- .quad 0x0002000180000000,0x1f0 # disabled wait
-
- .org 0x200
-
+ipl_start:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ sam64 # switch to 64 bit addressing mode
+ lgh %r1,__LC_SUBCHANNEL_ID # test if subchannel number
+ brctg %r1,.Lnoload # is valid
+ llgf %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number
+ lghi %r2,IPL_BS # load start address
+ bras %r14,.Lloader # load rest of ipl image
+ larl %r12,parmarea # pointer to parameter area
+ stg %r1,IPL_DEVICE-PARMAREA(%r12) # save ipl device number
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+ larl %r2,_end # ramdisk loc. is temp
+ bras %r14,.Lloader # load parameter file
+ ltgr %r2,%r2 # got anything ?
+ jz .Lnopf
+ lg %r3,MAX_COMMAND_LINE_SIZE-PARMAREA(%r12)
+ aghi %r3,-1
+ clgr %r2,%r3
+ jl .Lnotrunc
+ lgr %r2,%r3
+.Lnotrunc:
+ larl %r4,_end
+ larl %r13,.L_hdr
+ clc 0(3,%r4),0(%r13) # if it is HDRx
+ jz .Lagain1 # skip dataset header
+ larl %r13,.L_eof
+ clc 0(3,%r4),0(%r13) # if it is EOFx
+ jz .Lagain1 # skip dateset trailer
+ lgr %r5,%r2
+ la %r6,COMMAND_LINE-PARMAREA(%r12)
+ lgr %r7,%r2
+ aghi %r7,1
+ mvcl %r6,%r4
+.Lnopf:
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+ larl %r2,_end # addr of ramdisk
+ stg %r2,INITRD_START-PARMAREA(%r12)
+ bras %r14,.Lloader # load ramdisk
+ stg %r2,INITRD_SIZE-PARMAREA(%r12) # store size of rd
+ ltgr %r2,%r2
+ jnz .Lrdcont
+ stg %r2,INITRD_START-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+ larl %r2,_end
+ larl %r13,.L_hdr # skip HDRx and EOFx
+ clc 0(3,%r2),0(%r13)
+ jz .Lagain2
+ larl %r13,.L_eof
+ clc 0(3,%r2),0(%r13)
+ jz .Lagain2
+#
+# reset files in VM reader
+#
+ larl %r13,.Lcpuid
+ stidp 0(%r13) # store cpuid
+ tm 0(%r13),0xff # running VM ?
+ jno .Lnoreset
+ larl %r2,.Lreset
+ lghi %r3,26
+ diag %r2,%r3,8
+ larl %r5,.Lirb
+ stsch 0(%r5) # check if irq is pending
+ tm 30(%r5),0x0f # by verifying if any of the
+ jnz .Lwaitforirq # activity or status control
+ tm 31(%r5),0xff # bits is set in the schib
+ jz .Lnoreset
+.Lwaitforirq:
+ bras %r14,.Lirqwait # wait for IO interrupt
+ c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
+ jne .Lwaitforirq
+ larl %r5,.Lirb
+ tsch 0(%r5)
+.Lnoreset:
+ j .Lnoload
+#
+# everything loaded, go for it
+#
+.Lnoload:
+ jg startup
#
# subroutine to wait for end I/O
#
.Lirqwait:
- mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
+ larl %r13,.Lnewpswmask # set up IO interrupt psw
+ mvc __LC_IO_NEW_PSW(8),0(%r13)
+ stg %r14,__LC_IO_NEW_PSW+8
+ larl %r13,.Lwaitpsw
+ lpswe 0(%r13)
.Lioint:
- br %r14
- .align 8
-.Lnewpsw:
- .quad 0x0000000080000000,.Lioint
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
-
#
# subroutine for loading cards from the reader
#
.Lloader:
- la %r4,0(%r14)
- la %r3,.Lorb # r2 = address of orb into r2
- la %r5,.Lirb # r4 = address of irb
- la %r6,.Lccws
- la %r7,20
+ lgr %r4,%r14
+ larl %r3,.Lorb # r2 = address of orb into r2
+ larl %r5,.Lirb # r4 = address of irb
+ larl %r6,.Lccws
+ lghi %r7,20
.Linit:
st %r2,4(%r6) # initialize CCW data addresses
la %r2,0x50(%r2)
la %r6,8(%r6)
- bct 7,.Linit
-
- lctl %c6,%c6,.Lcr6 # set IO subclass mask
- slr %r2,%r2
+ brctg %r7,.Linit
+ larl %r13,.Lcr6
+ lctlg %c6,%c6,0(%r13)
+ xgr %r2,%r2
.Lldlp:
ssch 0(%r3) # load chunk of 1600 bytes
- bnz .Llderr
+ jnz .Llderr
.Lwait4irq:
- bas %r14,.Lirqwait
+ bras %r14,.Lirqwait
c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
- bne .Lwait4irq
+ jne .Lwait4irq
tsch 0(%r5)
-
- slr %r0,%r0
+ xgr %r0,%r0
ic %r0,8(%r5) # get device status
- chi %r0,8 # channel end ?
- be .Lcont
- chi %r0,12 # channel end + device end ?
- be .Lcont
-
- l %r0,4(%r5)
- s %r0,8(%r3) # r0/8 = number of ccws executed
- mhi %r0,10 # *10 = number of bytes in ccws
- lh %r3,10(%r5) # get residual count
- sr %r0,%r3 # #ccws*80-residual=#bytes read
- ar %r2,%r0
-
+ cghi %r0,8 # channel end ?
+ je .Lcont
+ cghi %r0,12 # channel end + device end ?
+ je .Lcont
+ llgf %r0,4(%r5)
+ sgf %r0,8(%r3) # r0/8 = number of ccws executed
+ mghi %r0,10 # *10 = number of bytes in ccws
+ llgh %r3,10(%r5) # get residual count
+ sgr %r0,%r3 # #ccws*80-residual=#bytes read
+ agr %r2,%r0
br %r4 # r2 contains the total size
-
.Lcont:
- ahi %r2,0x640 # add 0x640 to total size
- la %r6,.Lccws
- la %r7,20
+ aghi %r2,0x640 # add 0x640 to total size
+ larl %r6,.Lccws
+ lghi %r7,20
.Lincr:
l %r0,4(%r6) # update CCW data addresses
- ahi %r0,0x640
+ aghi %r0,0x640
st %r0,4(%r6)
- ahi %r6,8
- bct 7,.Lincr
-
- b .Lldlp
+ aghi %r6,8
+ brctg %r7,.Lincr
+ j .Lldlp
.Llderr:
- lpsw .Lcrash
+ larl %r13,.Lcrash
+ lpsw 0(%r13)
.align 8
+.Lwaitpsw:
+ .quad 0x0202000180000000,.Lioint
+.Lnewpswmask:
+ .quad 0x0000000180000000
+ .align 8
.Lorb: .long 0x00000000,0x0080ff00,.Lccws
.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6: .long 0xff000000
-.Lloadp:.long 0,0
+ .align 8
+.Lcr6: .quad 0x00000000ff000000
.align 8
.Lcrash:.long 0x000a0000,0x00000000
-
.align 8
.Lccws: .rept 19
.long 0x02600050,0x00000000
.endr
.long 0x02200050,0x00000000
-
-iplstart:
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
- lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number
- bct %r1,.Lnoload # is valid
- l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number
- la %r2,IPL_BS # load start address
- bas %r14,.Lloader # load rest of ipl image
- l %r12,.Lparm # pointer to parameter area
- st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
-
-#
-# load parameter file from ipl device
-#
-.Lagain1:
- l %r2,.Linitrd # ramdisk loc. is temp
- bas %r14,.Lloader # load parameter file
- ltr %r2,%r2 # got anything ?
- bz .Lnopf
- l %r3,MAX_COMMAND_LINE_SIZE+ARCH_OFFSET-PARMAREA(%r12)
- ahi %r3,-1
- clr %r2,%r3
- bl .Lnotrunc
- lr %r2,%r3
-.Lnotrunc:
- l %r4,.Linitrd
- clc 0(3,%r4),.L_hdr # if it is HDRx
- bz .Lagain1 # skip dataset header
- clc 0(3,%r4),.L_eof # if it is EOFx
- bz .Lagain1 # skip dateset trailer
-
- lr %r5,%r2
- la %r6,COMMAND_LINE-PARMAREA(%r12)
- lr %r7,%r2
- ahi %r7,1
- mvcl %r6,%r4
-.Lnopf:
-
-#
-# load ramdisk from ipl device
-#
-.Lagain2:
- l %r2,.Linitrd # addr of ramdisk
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
- bas %r14,.Lloader # load ramdisk
- st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
- ltr %r2,%r2
- bnz .Lrdcont
- st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
-.Lrdcont:
- l %r2,.Linitrd
-
- clc 0(3,%r2),.L_hdr # skip HDRx and EOFx
- bz .Lagain2
- clc 0(3,%r2),.L_eof
- bz .Lagain2
-
-#
-# reset files in VM reader
-#
- stidp .Lcpuid # store cpuid
- tm .Lcpuid,0xff # running VM ?
- bno .Lnoreset
- la %r2,.Lreset
- lhi %r3,26
- diag %r2,%r3,8
- la %r5,.Lirb
- stsch 0(%r5) # check if irq is pending
- tm 30(%r5),0x0f # by verifying if any of the
- bnz .Lwaitforirq # activity or status control
- tm 31(%r5),0xff # bits is set in the schib
- bz .Lnoreset
-.Lwaitforirq:
- bas %r14,.Lirqwait # wait for IO interrupt
- c %r1,__LC_SUBCHANNEL_ID # compare subchannel number
- bne .Lwaitforirq
- la %r5,.Lirb
- tsch 0(%r5)
-.Lnoreset:
- b .Lnoload
-
-#
-# everything loaded, go for it
-#
-.Lnoload:
- l %r1,.Lstartup
- br %r1
-
-.Linitrd:.long _end # default address of initrd
-.Lparm: .long PARMAREA
-.Lstartup: .long startup
.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
.byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
.byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
@@ -269,10 +215,10 @@ iplstart:
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
#
- .org STARTUP_NORMAL_OFFSET
+ .org STARTUP_NORMAL_OFFSET - IPL_START
SYM_CODE_START(startup)
j startup_normal
- .org EP_OFFSET
+ .org EP_OFFSET - IPL_START
#
# This is a list of s390 kernel entry points. At address 0x1000f the number of
# valid entry points is stored.
@@ -284,7 +230,7 @@ SYM_CODE_START(startup)
#
# kdump startup-code, running in 64 bit absolute addressing mode
#
- .org STARTUP_KDUMP_OFFSET
+ .org STARTUP_KDUMP_OFFSET - IPL_START
j startup_kdump
SYM_CODE_END(startup)
SYM_CODE_START_LOCAL(startup_normal)
@@ -296,20 +242,23 @@ SYM_CODE_START_LOCAL(startup_normal)
.fill 16,4,0x0
0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
sam64 # switch to 64 bit addressing mode
- basr %r13,0 # get base
-.LPG0:
- mvc __LC_EXT_NEW_PSW(16),.Lext_new_psw-.LPG0(%r13)
- mvc __LC_PGM_NEW_PSW(16),.Lpgm_new_psw-.LPG0(%r13)
- mvc __LC_IO_NEW_PSW(16),.Lio_new_psw-.LPG0(%r13)
+ larl %r13,.Lext_new_psw
+ mvc __LC_EXT_NEW_PSW(16),0(%r13)
+ larl %r13,.Lpgm_new_psw
+ mvc __LC_PGM_NEW_PSW(16),0(%r13)
+ larl %r13,.Lio_new_psw
+ mvc __LC_IO_NEW_PSW(16),0(%r13)
xc 0x200(256),0x200 # partially clear lowcore
xc 0x300(256),0x300
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
- lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers
+ larl %r13,.Lctl
+ lctlg %c0,%c15,0(%r13) # load control registers
stcke __LC_BOOT_CLOCK
mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
- spt 6f-.LPG0(%r13)
- mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+ larl %r13,6f
+ spt 0(%r13)
+ mvc __LC_LAST_UPDATE_TIMER(8),0(%r13)
larl %r15,_stack_end-STACK_FRAME_OVERHEAD
brasl %r14,sclp_early_setup_buffer
brasl %r14,verify_facilities
@@ -369,23 +318,3 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
lpswe __LC_RETURN_PSW # disabled wait
SYM_CODE_END(startup_pgm_check_handler)
-
-#
-# params at 10400 (setup.h)
-# Must be keept in sync with struct parmarea in setup.h
-#
- .org PARMAREA
-SYM_DATA_START(parmarea)
- .quad 0 # IPL_DEVICE
- .quad 0 # INITRD_START
- .quad 0 # INITRD_SIZE
- .quad 0 # OLDMEM_BASE
- .quad 0 # OLDMEM_SIZE
- .quad kernel_version # points to kernel version string
- .quad COMMAND_LINE_SIZE
-
- .org COMMAND_LINE
- .byte "root=/dev/ram0 ro"
- .byte 0
- .org PARMAREA+__PARMAREA_SIZE
-SYM_DATA_END(parmarea)
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index 515b27a996b3..616ba1660f08 100644..100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -14,12 +14,6 @@
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-#
-
-# User may have a custom install script
-
-if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
"bootloader config required" >&2
diff --git a/arch/s390/boot/ipl_data.c b/arch/s390/boot/ipl_data.c
new file mode 100644
index 000000000000..0846e2b249c6
--- /dev/null
+++ b/arch/s390/boot/ipl_data.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/compat.h>
+#include <linux/ptrace.h>
+#include <asm/cio.h>
+#include <asm/asm-offsets.h>
+#include "boot.h"
+
+#define CCW0(cmd, addr, cnt, flg) \
+ { .cmd_code = cmd, .cda = addr, .count = cnt, .flags = flg, }
+
+#define PSW_MASK_DISABLED (PSW_MASK_WAIT | PSW_MASK_EA | PSW_MASK_BA)
+
+struct ipl_lowcore {
+ psw_t32 ipl_psw; /* 0x0000 */
+ struct ccw0 ccwpgm[2]; /* 0x0008 */
+ u8 fill[56]; /* 0x0018 */
+ struct ccw0 ccwpgmcc[20]; /* 0x0050 */
+ u8 pad_0xf0[0x01a0-0x00f0]; /* 0x00f0 */
+ psw_t restart_psw; /* 0x01a0 */
+ psw_t external_new_psw; /* 0x01b0 */
+ psw_t svc_new_psw; /* 0x01c0 */
+ psw_t program_new_psw; /* 0x01d0 */
+ psw_t mcck_new_psw; /* 0x01e0 */
+ psw_t io_new_psw; /* 0x01f0 */
+};
+
+/*
+ * Initial lowcore for IPL: the first 24 bytes are loaded by IPL to
+ * addresses 0-23 (a PSW and two CCWs). Bytes 24-79 are discarded.
+ * The next 160 bytes are loaded to addresses 0x18-0xb7. They form
+ * the continuation of the CCW program started by IPL and load the
+ * range 0x0f0-0x730 from the image to the range 0x0f0-0x730 in
+ * memory. At the end of the channel program the PSW at location 0 is
+ * loaded.
+ * Initial processing starts at 0x200 = iplstart.
+ *
+ * The restart psw points to iplstart which allows to load a kernel
+ * image into memory and starting it by a psw restart on any cpu. All
+ * other default psw new locations contain a disabled wait psw where
+ * the address indicates which psw was loaded.
+ *
+ * Note that the 'file' utility can detect s390 kernel images. For
+ * that to succeed the two initial CCWs, and the 0x40 fill bytes must
+ * be present.
+ */
+static struct ipl_lowcore ipl_lowcore __used __section(".ipldata") = {
+ .ipl_psw = { .mask = PSW32_MASK_BASE, .addr = PSW32_ADDR_AMODE | IPL_START },
+ .ccwpgm = {
+ [ 0] = CCW0(CCW_CMD_READ_IPL, 0x018, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 1] = CCW0(CCW_CMD_READ_IPL, 0x068, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ },
+ .fill = {
+ [ 0 ... 55] = 0x40,
+ },
+ .ccwpgmcc = {
+ [ 0] = CCW0(CCW_CMD_READ_IPL, 0x0f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 1] = CCW0(CCW_CMD_READ_IPL, 0x140, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 2] = CCW0(CCW_CMD_READ_IPL, 0x190, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 3] = CCW0(CCW_CMD_READ_IPL, 0x1e0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 4] = CCW0(CCW_CMD_READ_IPL, 0x230, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 5] = CCW0(CCW_CMD_READ_IPL, 0x280, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 6] = CCW0(CCW_CMD_READ_IPL, 0x2d0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 7] = CCW0(CCW_CMD_READ_IPL, 0x320, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 8] = CCW0(CCW_CMD_READ_IPL, 0x370, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [ 9] = CCW0(CCW_CMD_READ_IPL, 0x3c0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [10] = CCW0(CCW_CMD_READ_IPL, 0x410, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [11] = CCW0(CCW_CMD_READ_IPL, 0x460, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [12] = CCW0(CCW_CMD_READ_IPL, 0x4b0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [13] = CCW0(CCW_CMD_READ_IPL, 0x500, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [14] = CCW0(CCW_CMD_READ_IPL, 0x550, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [15] = CCW0(CCW_CMD_READ_IPL, 0x5a0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [16] = CCW0(CCW_CMD_READ_IPL, 0x5f0, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [17] = CCW0(CCW_CMD_READ_IPL, 0x640, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [18] = CCW0(CCW_CMD_READ_IPL, 0x690, 0x50, CCW_FLAG_SLI | CCW_FLAG_CC),
+ [19] = CCW0(CCW_CMD_READ_IPL, 0x6e0, 0x50, CCW_FLAG_SLI),
+ },
+ .restart_psw = { .mask = 0, .addr = IPL_START, },
+ .external_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_EXT_NEW_PSW, },
+ .svc_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_SVC_NEW_PSW, },
+ .program_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_PGM_NEW_PSW, },
+ .mcck_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_MCK_NEW_PSW, },
+ .io_new_psw = { .mask = PSW_MASK_DISABLED, .addr = __LC_IO_NEW_PSW, },
+};
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 9ed7e29c81d9..ca78d6162245 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -8,9 +8,16 @@
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
+#include <asm/setup.h>
#include <asm/uv.h>
#include "boot.h"
+struct parmarea parmarea __section(".parmarea") = {
+ .kernel_version = (unsigned long)kernel_version,
+ .max_command_line_size = COMMAND_LINE_SIZE,
+ .command_line = "root=/dev/ram0 ro",
+};
+
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
int __bootdata(noexec_disabled);
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index d8984462071f..e8d74d4f62aa 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -8,7 +8,7 @@
#include <asm/timex.h>
#include <asm/sclp.h>
#include <asm/kasan.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
#include "boot.h"
#define PRNG_MODE_TDES 1
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 2f949cd9076b..7fa1a32ea0f3 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -7,7 +7,7 @@
#include <asm/sections.h>
#include <asm/mem_detect.h>
#include <asm/sparsemem.h>
-#include "compressed/decompressor.h"
+#include "decompressor.h"
#include "boot.h"
struct mem_detect_info __bootdata(mem_detect);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 1aa11a8f57dd..47ca3264c023 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -10,11 +10,14 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
-#include "compressed/decompressor.h"
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
#include "boot.h"
#include "uv.h"
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -152,6 +155,7 @@ static void setup_kernel_memory_layout(void)
unsigned long vmemmap_start;
unsigned long rte_size;
unsigned long pages;
+ unsigned long vmax;
pages = ident_map_size / PAGE_SIZE;
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
@@ -163,10 +167,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- MODULES_END = _REGION1_SIZE;
+ vmax = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- MODULES_END = _REGION2_SIZE;
+ vmax = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -174,11 +178,15 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- adjust_to_uv_max(&MODULES_END);
+ vmax = adjust_to_uv_max(vmax);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ vmax = min(vmax, KASAN_SHADOW_START);
#endif
+ __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+ __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+ sizeof(struct lowcore));
+ MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
@@ -283,8 +291,7 @@ void startup_kernel(void)
clear_bss_section();
copy_bootdata();
- if (IS_ENABLED(CONFIG_RELOCATABLE))
- handle_relocs(__kaslr_offset);
+ handle_relocs(__kaslr_offset);
if (__kaslr_offset) {
/*
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index e6be155ab2e5..0a077c0a2056 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -41,6 +41,12 @@ void uv_query_info(void)
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+ uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
+ uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
+ uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
+ uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
+ uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
+ uv_info.supp_att_pflags = uvcb.supp_att_pflags;
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
@@ -51,10 +57,11 @@ void uv_query_info(void)
}
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax)
+unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
- *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+ limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
+ return limit;
}
static int is_prot_virt_host_capable(void)
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 690ce019af5a..0f3070856f8d 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -3,10 +3,13 @@
#define BOOT_UV_H
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax);
+unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
#else
-static inline void adjust_to_uv_max(unsigned long *vmax) {}
+static inline unsigned long adjust_to_uv_max(unsigned long limit)
+{
+ return limit;
+}
static inline void sanitize_prot_virt_host(void) {}
#endif
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
index d32e58bdda6a..fd32f038777f 100644
--- a/arch/s390/boot/version.c
+++ b/arch/s390/boot/version.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
#include <generated/utsrelease.h>
#include <generated/compile.h>
#include "boot.h"
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 918e05137d4c..fa9d33b01b85 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -4,6 +4,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/sclp.h>
+#include "boot.h"
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
@@ -13,11 +14,19 @@ ENTRY(startup)
SECTIONS
{
. = 0;
+ .ipldata : {
+ *(.ipldata)
+ }
+ . = IPL_START;
.head.text : {
_head = . ;
HEAD_TEXT
_ehead = . ;
}
+ . = PARMAREA;
+ .parmarea : {
+ *(.parmarea)
+ }
.text : {
_text = .; /* Text */
*(.text)
@@ -93,8 +102,17 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
- FILL(0xff);
- . = ALIGN(4096);
+ }
+
+#define SB_TRAILER_SIZE 32
+ /* Trailer needed for Secure Boot */
+ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
+ . = ALIGN(4096) - SB_TRAILER_SIZE;
+ .sb.trailer : {
+ QUAD(0)
+ QUAD(0)
+ QUAD(0)
+ QUAD(0x000000207a49504c)
}
_end = .;
diff --git a/arch/s390/configs/btf.config b/arch/s390/configs/btf.config
new file mode 100644
index 000000000000..39227b4511af
--- /dev/null
+++ b/arch/s390/configs/btf.config
@@ -0,0 +1 @@
+CONFIG_DEBUG_INFO_BTF=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index b626bc6e0eaf..63807bd0b536 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -40,8 +40,6 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_ZEC12=y
@@ -63,6 +61,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -73,6 +72,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG_SHA256=y
@@ -92,24 +92,25 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_SLUB_STATS=y
+# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -167,6 +168,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -185,7 +187,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -391,6 +392,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -400,6 +402,7 @@ CONFIG_PCI_IOV=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -492,20 +495,22 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CADENCE is not set
# CONFIG_NET_VENDOR_CAVIUM is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_CISCO is not set
# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -514,16 +519,18 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETERION is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
# CONFIG_NET_VENDOR_PENSANDO is not set
# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RDC is not set
# CONFIG_NET_VENDOR_REALTEK is not set
@@ -531,9 +538,9 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_ROCKER is not set
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SILAN is not set
# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SMSC is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
# CONFIG_NET_VENDOR_STMICRO is not set
@@ -541,6 +548,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -565,6 +573,8 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_RANDOM_TRUST_CPU is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
CONFIG_PPS=m
# CONFIG_PTP_1588_CLOCK is not set
# CONFIG_HWMON is not set
@@ -585,6 +595,7 @@ CONFIG_MLX5_INFINIBAND=m
CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
CONFIG_VFIO_MDEV=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
@@ -686,6 +697,7 @@ CONFIG_ENCRYPTED_KEYS=m
CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
@@ -711,28 +723,9 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
@@ -742,9 +735,30 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -755,23 +769,23 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
CONFIG_CRC7=m
@@ -782,14 +796,13 @@ CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_HEADERS_INSTALL=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_SLUB_DEBUG_ON=y
CONFIG_PAGE_OWNER=y
CONFIG_DEBUG_RODATA_TEST=y
CONFIG_DEBUG_WX=y
@@ -801,28 +814,25 @@ CONFIG_DEBUG_OBJECTS_TIMERS=y
CONFIG_DEBUG_OBJECTS_WORK=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
-CONFIG_SLUB_DEBUG_ON=y
-CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_VMACACHE=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_KFENCE=y
+CONFIG_KFENCE_DEFERRABLE=y
CONFIG_KFENCE_STATIC_KEYS=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_TEST_LOCKUP=m
-CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 0056cab27372..4f9a98247442 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -38,8 +38,6 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_ZEC12=y
@@ -61,6 +59,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
# CONFIG_GCC_PLUGINS is not set
@@ -68,6 +67,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG_SHA256=y
@@ -87,21 +87,21 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC_STAT=y
+# CONFIG_COMPAT_BRK is not set
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=y
-CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -159,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
@@ -177,7 +178,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -382,6 +382,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -391,6 +392,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -483,20 +485,22 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ASIX is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CADENCE is not set
# CONFIG_NET_VENDOR_CAVIUM is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_CISCO is not set
# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DAVICOM is not set
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUNGIBLE is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MICROSOFT is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -505,16 +509,18 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MICROSOFT is not set
# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_NETERION is not set
# CONFIG_NET_VENDOR_NETRONOME is not set
-# CONFIG_NET_VENDOR_NI is not set
# CONFIG_NET_VENDOR_NVIDIA is not set
# CONFIG_NET_VENDOR_OKI is not set
# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
# CONFIG_NET_VENDOR_PENSANDO is not set
# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_QUALCOMM is not set
# CONFIG_NET_VENDOR_RDC is not set
# CONFIG_NET_VENDOR_REALTEK is not set
@@ -522,9 +528,9 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_ROCKER is not set
# CONFIG_NET_VENDOR_SAMSUNG is not set
# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SILAN is not set
# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SMSC is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
# CONFIG_NET_VENDOR_STMICRO is not set
@@ -532,6 +538,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -556,6 +563,8 @@ CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_RANDOM_TRUST_CPU is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
# CONFIG_PTP_1588_CLOCK is not set
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
@@ -575,6 +584,7 @@ CONFIG_MLX5_INFINIBAND=m
CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
+CONFIG_MLX5_VFIO_PCI=m
CONFIG_VFIO_MDEV=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
@@ -697,29 +707,9 @@ CONFIG_CRYPTO_ECDSA=m
CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
-CONFIG_CRYPTO_GCM=y
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_AEGIS128=m
-CONFIG_CRYPTO_SEQIV=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_ADIANTUM=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_BLAKE2S=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_ARIA=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
@@ -729,9 +719,31 @@ CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SM4_GENERIC=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ADIANTUM=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_HCTR2=m
+CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_SHA3=m
+CONFIG_CRYPTO_SM3_GENERIC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -742,24 +754,24 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_CHACHA_S390=m
+CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
@@ -768,9 +780,7 @@ CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_GDB_SCRIPTS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config
new file mode 100644
index 000000000000..700a8b25c3ff
--- /dev/null
+++ b/arch/s390/configs/kasan.config
@@ -0,0 +1,3 @@
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_KASAN_VMALLOC=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index eed3b9acfa71..5fe9948be644 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,6 +1,6 @@
-# CONFIG_SWAP is not set
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
@@ -8,13 +8,11 @@ CONFIG_HIGH_RES_TIMERS=y
# CONFIG_NET_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_MARCH_ZEC12=y
CONFIG_TUNE_ZEC12=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
-# CONFIG_ARCH_RANDOM is not set
# CONFIG_RELOCATABLE is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
@@ -25,8 +23,11 @@ CONFIG_CRASH_DUMP=y
# CONFIG_S390_GUEST is not set
# CONFIG_SECCOMP is not set
# CONFIG_GCC_PLUGINS is not set
+# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SWAP is not set
+# CONFIG_COMPAT_BRK is not set
# CONFIG_COMPACTION is not set
# CONFIG_MIGRATION is not set
CONFIG_NET=y
@@ -34,6 +35,7 @@ CONFIG_NET=y
# CONFIG_PCPU_DEV_REFCNT is not set
# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_DCSSBLK is not set
# CONFIG_DASD is not set
@@ -51,10 +53,12 @@ CONFIG_ZFCP=y
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
+# CONFIG_S390_UV_UAPI is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
+# CONFIG_RANDOM_TRUST_BOOTLOADER is not set
# CONFIG_HID is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
@@ -68,10 +72,9 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity"
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
# CONFIG_SYMBOLIC_ERRNAME is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_FS=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 000000000000..06ee706b0d78
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_CRC32_S390
+ tristate "CRC32c and CRC32"
+ depends on S390
+ select CRYPTO_HASH
+ select CRC32
+ help
+ CRC32c and CRC32 CRC algorithms
+
+ Architecture: s390
+
+ It is available with IBM z13 or later.
+
+config CRYPTO_SHA512_S390
+ tristate "Hash functions: SHA-384 and SHA-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z10.
+
+config CRYPTO_SHA1_S390
+ tristate "Hash functions: SHA-1"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash algorithm (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z990.
+
+config CRYPTO_SHA256_S390
+ tristate "Hash functions: SHA-224 and SHA-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+ Architecture: s390
+
+ It is available as of z9.
+
+config CRYPTO_SHA3_256_S390
+ tristate "Hash functions: SHA3-224 and SHA3-256"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+ tristate "Hash functions: SHA3-384 and SHA3-512"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+ Architecture: s390
+
+ It is available as of z14.
+
+config CRYPTO_GHASH_S390
+ tristate "Hash functions: GHASH"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ GCM GHASH hash function (NIST SP800-38D)
+
+ Architecture: s390
+
+ It is available as of z196.
+
+config CRYPTO_AES_S390
+ tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ help
+ Block cipher: AES cipher algorithms (FIPS 197)
+ AEAD cipher: AES with GCM
+ Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+ Architecture: s390
+
+ As of z9 the ECB and CBC modes are hardware accelerated
+ for 128 bit keys.
+
+ As of z10 the ECB and CBC modes are hardware accelerated
+ for all AES key sizes.
+
+ As of z196 the CTR mode is hardware accelerated for all AES
+ key sizes and XTS mode is hardware accelerated for 256 and
+ 512 bit keys.
+
+config CRYPTO_DES_S390
+ tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_DES
+ help
+ Block ciphers: DES (FIPS 46-2) cipher algorithm
+ Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+ Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+ Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+ Architecture: s390
+
+ As of z990 the ECB and CBC mode are hardware accelerated.
+ As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_CHACHA_S390
+ tristate "Ciphers: ChaCha20"
+ depends on S390
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+
+ Architecture: s390
+
+ It is available as of z13.
+
+endmenu
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 12889d4652cc..1b1cc478fa94 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -11,9 +11,11 @@ obj-$(CONFIG_CRYPTO_SHA3_512_S390) += sha3_512_s390.o sha_common.o
obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
+obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
-obj-$(CONFIG_ARCH_RANDOM) += arch_random.o
+obj-y += arch_random.o
crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
+chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 54c7536f2482..526c3f40f6a2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -701,7 +701,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw,
unsigned int nbytes)
{
gw->walk_bytes_remain -= nbytes;
- scatterwalk_unmap(&gw->walk);
+ scatterwalk_unmap(gw->walk_ptr);
scatterwalk_advance(&gw->walk, nbytes);
scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain);
gw->walk_ptr = NULL;
@@ -776,7 +776,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded)
goto out;
}
- scatterwalk_unmap(&gw->walk);
+ scatterwalk_unmap(gw->walk_ptr);
gw->walk_ptr = NULL;
gw->ptr = gw->buf;
@@ -1049,7 +1049,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, aes_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
module_exit(aes_s390_fini);
MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 56007c763902..1f2d40993c4d 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -4,232 +4,15 @@
*
* Copyright IBM Corp. 2017, 2020
* Author(s): Harald Freudenberger
- *
- * The s390_arch_random_generate() function may be called from random.c
- * in interrupt context. So this implementation does the best to be very
- * fast. There is a buffer of random data which is asynchronously checked
- * and filled by a workqueue thread.
- * If there are enough bytes in the buffer the s390_arch_random_generate()
- * just delivers these bytes. Otherwise false is returned until the
- * worker thread refills the buffer.
- * The worker fills the rng buffer by pulling fresh entropy from the
- * high quality (but slow) true hardware random generator. This entropy
- * is then spread over the buffer with an pseudo random generator PRNG.
- * As the arch_get_random_seed_long() fetches 8 bytes and the calling
- * function add_interrupt_randomness() counts this as 1 bit entropy the
- * distribution needs to make sure there is in fact 1 bit entropy contained
- * in 8 bytes of the buffer. The current values pull 32 byte entropy
- * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
- * will contain 1 bit of entropy.
- * The worker thread is rescheduled based on the charge level of the
- * buffer but at least with 500 ms delay to avoid too much CPU consumption.
- * So the max. amount of rng data delivered via arch_get_random_seed is
- * limited to 4k bytes per second.
*/
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
-#include <linux/slab.h>
#include <linux/static_key.h>
-#include <linux/workqueue.h>
-#include <linux/moduleparam.h>
#include <asm/cpacf.h>
DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
EXPORT_SYMBOL(s390_arch_random_counter);
-
-#define ARCH_REFILL_TICKS (HZ/2)
-#define ARCH_PRNG_SEED_SIZE 32
-#define ARCH_RNG_BUF_SIZE 2048
-
-static DEFINE_SPINLOCK(arch_rng_lock);
-static u8 *arch_rng_buf;
-static unsigned int arch_rng_buf_idx;
-
-static void arch_rng_refill_buffer(struct work_struct *);
-static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
-
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
-{
- /* max hunk is ARCH_RNG_BUF_SIZE */
- if (nbytes > ARCH_RNG_BUF_SIZE)
- return false;
-
- /* lock rng buffer */
- if (!spin_trylock(&arch_rng_lock))
- return false;
-
- /* try to resolve the requested amount of bytes from the buffer */
- arch_rng_buf_idx -= nbytes;
- if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
- memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
- atomic64_add(nbytes, &s390_arch_random_counter);
- spin_unlock(&arch_rng_lock);
- return true;
- }
-
- /* not enough bytes in rng buffer, refill is done asynchronously */
- spin_unlock(&arch_rng_lock);
-
- return false;
-}
-EXPORT_SYMBOL(s390_arch_random_generate);
-
-static void arch_rng_refill_buffer(struct work_struct *unused)
-{
- unsigned int delay = ARCH_REFILL_TICKS;
-
- spin_lock(&arch_rng_lock);
- if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
- /* buffer is exhausted and needs refill */
- u8 seed[ARCH_PRNG_SEED_SIZE];
- u8 prng_wa[240];
- /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
- memset(prng_wa, 0, sizeof(prng_wa));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_wa, NULL, 0, seed, sizeof(seed));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
- &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
- arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
- }
- delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
- spin_unlock(&arch_rng_lock);
-
- /* kick next check */
- queue_delayed_work(system_long_wq, &arch_rng_work, delay);
-}
-
-/*
- * Here follows the implementation of s390_arch_get_random_long().
- *
- * The random longs to be pulled by arch_get_random_long() are
- * prepared in an 4K buffer which is filled from the NIST 800-90
- * compliant s390 drbg. By default the random long buffer is refilled
- * 256 times before the drbg itself needs a reseed. The reseed of the
- * drbg is done with 32 bytes fetched from the high quality (but slow)
- * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256
- * bits of entropy are spread over 256 * 4KB = 1MB serving 131072
- * arch_get_random_long() invocations before reseeded.
- *
- * How often the 4K random long buffer is refilled with the drbg
- * before the drbg is reseeded can be adjusted. There is a module
- * parameter 's390_arch_rnd_long_drbg_reseed' accessible via
- * /sys/module/arch_random/parameters/rndlong_drbg_reseed
- * or as kernel command line parameter
- * arch_random.rndlong_drbg_reseed=<value>
- * This parameter tells how often the drbg fills the 4K buffer before
- * it is re-seeded by fresh entropy from the trng.
- * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64
- * KB with 32 bytes of fresh entropy pulled from the trng. So a value
- * of 16 would result in 256 bits entropy per 64 KB.
- * A value of 256 results in 1MB of drbg output before a reseed of the
- * drbg is done. So this would spread the 256 bits of entropy among 1MB.
- * Setting this parameter to 0 forces the reseed to take place every
- * time the 4K buffer is depleted, so the entropy rises to 256 bits
- * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With
- * setting this parameter to negative values all this effort is
- * disabled, arch_get_random long() returns false and thus indicating
- * that the arch_get_random_long() feature is disabled at all.
- */
-
-static unsigned long rndlong_buf[512];
-static DEFINE_SPINLOCK(rndlong_lock);
-static int rndlong_buf_index;
-
-static int rndlong_drbg_reseed = 256;
-module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600);
-MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed");
-
-static inline void refill_rndlong_buf(void)
-{
- static u8 prng_ws[240];
- static int drbg_counter;
-
- if (--drbg_counter < 0) {
- /* need to re-seed the drbg */
- u8 seed[32];
-
- /* fetch seed from trng */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* seed drbg */
- memset(prng_ws, 0, sizeof(prng_ws));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_ws, NULL, 0, seed, sizeof(seed));
- /* re-init counter for drbg */
- drbg_counter = rndlong_drbg_reseed;
- }
-
- /* fill the arch_get_random_long buffer from drbg */
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws,
- (u8 *) rndlong_buf, sizeof(rndlong_buf),
- NULL, 0);
-}
-
-bool s390_arch_get_random_long(unsigned long *v)
-{
- bool rc = false;
- unsigned long flags;
-
- /* arch_get_random_long() disabled ? */
- if (rndlong_drbg_reseed < 0)
- return false;
-
- /* try to lock the random long lock */
- if (!spin_trylock_irqsave(&rndlong_lock, flags))
- return false;
-
- if (--rndlong_buf_index >= 0) {
- /* deliver next long value from the buffer */
- *v = rndlong_buf[rndlong_buf_index];
- rc = true;
- goto out;
- }
-
- /* buffer is depleted and needs refill */
- if (in_interrupt()) {
- /* delay refill in interrupt context to next caller */
- rndlong_buf_index = 0;
- goto out;
- }
-
- /* refill random long buffer */
- refill_rndlong_buf();
- rndlong_buf_index = ARRAY_SIZE(rndlong_buf);
-
- /* and provide one random long */
- *v = rndlong_buf[--rndlong_buf_index];
- rc = true;
-
-out:
- spin_unlock_irqrestore(&rndlong_lock, flags);
- return rc;
-}
-EXPORT_SYMBOL(s390_arch_get_random_long);
-
-static int __init s390_arch_random_init(void)
-{
- /* all the needed PRNO subfunctions available ? */
- if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
- cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
-
- /* alloc arch random working buffer */
- arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
- if (!arch_rng_buf)
- return -ENOMEM;
-
- /* kick worker queue job to fill the random buffer */
- queue_delayed_work(system_long_wq,
- &arch_rng_work, ARCH_REFILL_TICKS);
-
- /* enable arch random to the outside world */
- static_branch_enable(&s390_arch_random_available);
- }
-
- return 0;
-}
-arch_initcall(s390_arch_random_init);
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
new file mode 100644
index 000000000000..7752bd314558
--- /dev/null
+++ b/arch/s390/crypto/chacha-glue.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#define KMSG_COMPONENT "chacha_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/algapi.h>
+#include <linux/cpufeature.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sizes.h>
+#include <asm/fpu/api.h>
+#include "chacha-s390.h"
+
+static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
+ unsigned int nbytes, const u32 *key,
+ u32 *counter)
+{
+ struct kernel_fpu vxstate;
+
+ kernel_fpu_begin(&vxstate, KERNEL_VXR);
+ chacha20_vx(dst, src, nbytes, key, counter);
+ kernel_fpu_end(&vxstate, KERNEL_VXR);
+
+ *counter += round_up(nbytes, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
+}
+
+static int chacha20_s390(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ u32 state[CHACHA_STATE_WORDS] __aligned(16);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int rc;
+
+ rc = skcipher_walk_virt(&walk, req, false);
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ while (walk.nbytes > 0) {
+ nbytes = walk.nbytes;
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ if (nbytes <= CHACHA_BLOCK_SIZE) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ chacha20_crypt_s390(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ &state[4], &state[12]);
+ }
+ rc = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+ return rc;
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ /* TODO: implement hchacha_block_arch() in assembly */
+ hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ /* s390 chacha20 implementation has 20 rounds hard-coded,
+ * it cannot handle a block of data or less, but otherwise
+ * it can handle data of arbitrary size
+ */
+ if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+ else
+ chacha20_crypt_s390(state, dst, src, bytes,
+ &state[4], &state[12]);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static struct skcipher_alg chacha_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-s390",
+ .base.cra_priority = 900,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha20_s390,
+ .decrypt = chacha20_s390,
+ }
+};
+
+static int __init chacha_mod_init(void)
+{
+ return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+ crypto_register_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs)) : 0;
+}
+
+static void __exit chacha_mod_fini(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER))
+ crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
+}
+
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
+module_exit(chacha_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha20 stream cipher");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/s390/crypto/chacha-s390.S b/arch/s390/crypto/chacha-s390.S
new file mode 100644
index 000000000000..9b033622191c
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.S
@@ -0,0 +1,907 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Original implementation written by Andy Polyakov, @dot-asm.
+ * This is an adaptation of the original code for kernel use.
+ *
+ * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/vx-insn.h>
+
+#define SP %r15
+#define FRAME (16 * 8 + 4 * 8)
+
+.data
+.align 32
+
+.Lsigma:
+.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+.long 1,0,0,0
+.long 2,0,0,0
+.long 3,0,0,0
+.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+
+.long 0,1,2,3
+.long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
+.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
+.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
+.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
+
+.previous
+
+ GEN_BR_THUNK %r14
+
+.text
+
+#############################################################################
+# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
+# counst u32 *key, const u32 *counter)
+
+#define OUT %r2
+#define INP %r3
+#define LEN %r4
+#define KEY %r5
+#define COUNTER %r6
+
+#define BEPERM %v31
+#define CTR %v26
+
+#define K0 %v16
+#define K1 %v17
+#define K2 %v18
+#define K3 %v19
+
+#define XA0 %v0
+#define XA1 %v1
+#define XA2 %v2
+#define XA3 %v3
+
+#define XB0 %v4
+#define XB1 %v5
+#define XB2 %v6
+#define XB3 %v7
+
+#define XC0 %v8
+#define XC1 %v9
+#define XC2 %v10
+#define XC3 %v11
+
+#define XD0 %v12
+#define XD1 %v13
+#define XD2 %v14
+#define XD3 %v15
+
+#define XT0 %v27
+#define XT1 %v28
+#define XT2 %v29
+#define XT3 %v30
+
+ENTRY(chacha20_vx_4x)
+ stmg %r6,%r7,6*8(SP)
+
+ larl %r7,.Lsigma
+ lhi %r0,10
+ lhi %r1,0
+
+ VL K0,0,,%r7 # load sigma
+ VL K1,0,,KEY # load key
+ VL K2,16,,KEY
+ VL K3,0,,COUNTER # load counter
+
+ VL BEPERM,0x40,,%r7
+ VL CTR,0x50,,%r7
+
+ VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
+
+ VREPF XB0,K1,0 # smash the key
+ VREPF XB1,K1,1
+ VREPF XB2,K1,2
+ VREPF XB3,K1,3
+
+ VREPF XD0,K3,0
+ VREPF XD1,K3,1
+ VREPF XD2,K3,2
+ VREPF XD3,K3,3
+ VAF XD0,XD0,CTR
+
+ VREPF XC0,K2,0
+ VREPF XC1,K2,1
+ VREPF XC2,K2,2
+ VREPF XC3,K2,3
+
+.Loop_4x:
+ VAF XA0,XA0,XB0
+ VX XD0,XD0,XA0
+ VERLLF XD0,XD0,16
+
+ VAF XA1,XA1,XB1
+ VX XD1,XD1,XA1
+ VERLLF XD1,XD1,16
+
+ VAF XA2,XA2,XB2
+ VX XD2,XD2,XA2
+ VERLLF XD2,XD2,16
+
+ VAF XA3,XA3,XB3
+ VX XD3,XD3,XA3
+ VERLLF XD3,XD3,16
+
+ VAF XC0,XC0,XD0
+ VX XB0,XB0,XC0
+ VERLLF XB0,XB0,12
+
+ VAF XC1,XC1,XD1
+ VX XB1,XB1,XC1
+ VERLLF XB1,XB1,12
+
+ VAF XC2,XC2,XD2
+ VX XB2,XB2,XC2
+ VERLLF XB2,XB2,12
+
+ VAF XC3,XC3,XD3
+ VX XB3,XB3,XC3
+ VERLLF XB3,XB3,12
+
+ VAF XA0,XA0,XB0
+ VX XD0,XD0,XA0
+ VERLLF XD0,XD0,8
+
+ VAF XA1,XA1,XB1
+ VX XD1,XD1,XA1
+ VERLLF XD1,XD1,8
+
+ VAF XA2,XA2,XB2
+ VX XD2,XD2,XA2
+ VERLLF XD2,XD2,8
+
+ VAF XA3,XA3,XB3
+ VX XD3,XD3,XA3
+ VERLLF XD3,XD3,8
+
+ VAF XC0,XC0,XD0
+ VX XB0,XB0,XC0
+ VERLLF XB0,XB0,7
+
+ VAF XC1,XC1,XD1
+ VX XB1,XB1,XC1
+ VERLLF XB1,XB1,7
+
+ VAF XC2,XC2,XD2
+ VX XB2,XB2,XC2
+ VERLLF XB2,XB2,7
+
+ VAF XC3,XC3,XD3
+ VX XB3,XB3,XC3
+ VERLLF XB3,XB3,7
+
+ VAF XA0,XA0,XB1
+ VX XD3,XD3,XA0
+ VERLLF XD3,XD3,16
+
+ VAF XA1,XA1,XB2
+ VX XD0,XD0,XA1
+ VERLLF XD0,XD0,16
+
+ VAF XA2,XA2,XB3
+ VX XD1,XD1,XA2
+ VERLLF XD1,XD1,16
+
+ VAF XA3,XA3,XB0
+ VX XD2,XD2,XA3
+ VERLLF XD2,XD2,16
+
+ VAF XC2,XC2,XD3
+ VX XB1,XB1,XC2
+ VERLLF XB1,XB1,12
+
+ VAF XC3,XC3,XD0
+ VX XB2,XB2,XC3
+ VERLLF XB2,XB2,12
+
+ VAF XC0,XC0,XD1
+ VX XB3,XB3,XC0
+ VERLLF XB3,XB3,12
+
+ VAF XC1,XC1,XD2
+ VX XB0,XB0,XC1
+ VERLLF XB0,XB0,12
+
+ VAF XA0,XA0,XB1
+ VX XD3,XD3,XA0
+ VERLLF XD3,XD3,8
+
+ VAF XA1,XA1,XB2
+ VX XD0,XD0,XA1
+ VERLLF XD0,XD0,8
+
+ VAF XA2,XA2,XB3
+ VX XD1,XD1,XA2
+ VERLLF XD1,XD1,8
+
+ VAF XA3,XA3,XB0
+ VX XD2,XD2,XA3
+ VERLLF XD2,XD2,8
+
+ VAF XC2,XC2,XD3
+ VX XB1,XB1,XC2
+ VERLLF XB1,XB1,7
+
+ VAF XC3,XC3,XD0
+ VX XB2,XB2,XC3
+ VERLLF XB2,XB2,7
+
+ VAF XC0,XC0,XD1
+ VX XB3,XB3,XC0
+ VERLLF XB3,XB3,7
+
+ VAF XC1,XC1,XD2
+ VX XB0,XB0,XC1
+ VERLLF XB0,XB0,7
+ brct %r0,.Loop_4x
+
+ VAF XD0,XD0,CTR
+
+ VMRHF XT0,XA0,XA1 # transpose data
+ VMRHF XT1,XA2,XA3
+ VMRLF XT2,XA0,XA1
+ VMRLF XT3,XA2,XA3
+ VPDI XA0,XT0,XT1,0b0000
+ VPDI XA1,XT0,XT1,0b0101
+ VPDI XA2,XT2,XT3,0b0000
+ VPDI XA3,XT2,XT3,0b0101
+
+ VMRHF XT0,XB0,XB1
+ VMRHF XT1,XB2,XB3
+ VMRLF XT2,XB0,XB1
+ VMRLF XT3,XB2,XB3
+ VPDI XB0,XT0,XT1,0b0000
+ VPDI XB1,XT0,XT1,0b0101
+ VPDI XB2,XT2,XT3,0b0000
+ VPDI XB3,XT2,XT3,0b0101
+
+ VMRHF XT0,XC0,XC1
+ VMRHF XT1,XC2,XC3
+ VMRLF XT2,XC0,XC1
+ VMRLF XT3,XC2,XC3
+ VPDI XC0,XT0,XT1,0b0000
+ VPDI XC1,XT0,XT1,0b0101
+ VPDI XC2,XT2,XT3,0b0000
+ VPDI XC3,XT2,XT3,0b0101
+
+ VMRHF XT0,XD0,XD1
+ VMRHF XT1,XD2,XD3
+ VMRLF XT2,XD0,XD1
+ VMRLF XT3,XD2,XD3
+ VPDI XD0,XT0,XT1,0b0000
+ VPDI XD1,XT0,XT1,0b0101
+ VPDI XD2,XT2,XT3,0b0000
+ VPDI XD3,XT2,XT3,0b0101
+
+ VAF XA0,XA0,K0
+ VAF XB0,XB0,K1
+ VAF XC0,XC0,K2
+ VAF XD0,XD0,K3
+
+ VPERM XA0,XA0,XA0,BEPERM
+ VPERM XB0,XB0,XB0,BEPERM
+ VPERM XC0,XC0,XC0,BEPERM
+ VPERM XD0,XD0,XD0,BEPERM
+
+ VLM XT0,XT3,0,INP,0
+
+ VX XT0,XT0,XA0
+ VX XT1,XT1,XB0
+ VX XT2,XT2,XC0
+ VX XT3,XT3,XD0
+
+ VSTM XT0,XT3,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+
+ VAF XA0,XA1,K0
+ VAF XB0,XB1,K1
+ VAF XC0,XC1,K2
+ VAF XD0,XD1,K3
+
+ VPERM XA0,XA0,XA0,BEPERM
+ VPERM XB0,XB0,XB0,BEPERM
+ VPERM XC0,XC0,XC0,BEPERM
+ VPERM XD0,XD0,XD0,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_4x
+
+ VLM XT0,XT3,0,INP,0
+
+ VX XT0,XT0,XA0
+ VX XT1,XT1,XB0
+ VX XT2,XT2,XC0
+ VX XT3,XT3,XD0
+
+ VSTM XT0,XT3,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_4x
+
+ VAF XA0,XA2,K0
+ VAF XB0,XB2,K1
+ VAF XC0,XC2,K2
+ VAF XD0,XD2,K3
+
+ VPERM XA0,XA0,XA0,BEPERM
+ VPERM XB0,XB0,XB0,BEPERM
+ VPERM XC0,XC0,XC0,BEPERM
+ VPERM XD0,XD0,XD0,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_4x
+
+ VLM XT0,XT3,0,INP,0
+
+ VX XT0,XT0,XA0
+ VX XT1,XT1,XB0
+ VX XT2,XT2,XC0
+ VX XT3,XT3,XD0
+
+ VSTM XT0,XT3,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_4x
+
+ VAF XA0,XA3,K0
+ VAF XB0,XB3,K1
+ VAF XC0,XC3,K2
+ VAF XD0,XD3,K3
+
+ VPERM XA0,XA0,XA0,BEPERM
+ VPERM XB0,XB0,XB0,BEPERM
+ VPERM XC0,XC0,XC0,BEPERM
+ VPERM XD0,XD0,XD0,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_4x
+
+ VLM XT0,XT3,0,INP,0
+
+ VX XT0,XT0,XA0
+ VX XT1,XT1,XB0
+ VX XT2,XT2,XC0
+ VX XT3,XT3,XD0
+
+ VSTM XT0,XT3,0,OUT,0
+
+.Ldone_4x:
+ lmg %r6,%r7,6*8(SP)
+ BR_EX %r14
+
+.Ltail_4x:
+ VLR XT0,XC0
+ VLR XT1,XD0
+
+ VST XA0,8*8+0x00,,SP
+ VST XB0,8*8+0x10,,SP
+ VST XT0,8*8+0x20,,SP
+ VST XT1,8*8+0x30,,SP
+
+ lghi %r1,0
+
+.Loop_tail_4x:
+ llgc %r5,0(%r1,INP)
+ llgc %r6,8*8(%r1,SP)
+ xr %r6,%r5
+ stc %r6,0(%r1,OUT)
+ la %r1,1(%r1)
+ brct LEN,.Loop_tail_4x
+
+ lmg %r6,%r7,6*8(SP)
+ BR_EX %r14
+ENDPROC(chacha20_vx_4x)
+
+#undef OUT
+#undef INP
+#undef LEN
+#undef KEY
+#undef COUNTER
+
+#undef BEPERM
+
+#undef K0
+#undef K1
+#undef K2
+#undef K3
+
+
+#############################################################################
+# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
+# counst u32 *key, const u32 *counter)
+
+#define OUT %r2
+#define INP %r3
+#define LEN %r4
+#define KEY %r5
+#define COUNTER %r6
+
+#define BEPERM %v31
+
+#define K0 %v27
+#define K1 %v24
+#define K2 %v25
+#define K3 %v26
+
+#define A0 %v0
+#define B0 %v1
+#define C0 %v2
+#define D0 %v3
+
+#define A1 %v4
+#define B1 %v5
+#define C1 %v6
+#define D1 %v7
+
+#define A2 %v8
+#define B2 %v9
+#define C2 %v10
+#define D2 %v11
+
+#define A3 %v12
+#define B3 %v13
+#define C3 %v14
+#define D3 %v15
+
+#define A4 %v16
+#define B4 %v17
+#define C4 %v18
+#define D4 %v19
+
+#define A5 %v20
+#define B5 %v21
+#define C5 %v22
+#define D5 %v23
+
+#define T0 %v27
+#define T1 %v28
+#define T2 %v29
+#define T3 %v30
+
+ENTRY(chacha20_vx)
+ clgfi LEN,256
+ jle chacha20_vx_4x
+ stmg %r6,%r7,6*8(SP)
+
+ lghi %r1,-FRAME
+ lgr %r0,SP
+ la SP,0(%r1,SP)
+ stg %r0,0(SP) # back-chain
+
+ larl %r7,.Lsigma
+ lhi %r0,10
+
+ VLM K1,K2,0,KEY,0 # load key
+ VL K3,0,,COUNTER # load counter
+
+ VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
+
+.Loop_outer_vx:
+ VLR A0,K0
+ VLR B0,K1
+ VLR A1,K0
+ VLR B1,K1
+ VLR A2,K0
+ VLR B2,K1
+ VLR A3,K0
+ VLR B3,K1
+ VLR A4,K0
+ VLR B4,K1
+ VLR A5,K0
+ VLR B5,K1
+
+ VLR D0,K3
+ VAF D1,K3,T1 # K[3]+1
+ VAF D2,K3,T2 # K[3]+2
+ VAF D3,K3,T3 # K[3]+3
+ VAF D4,D2,T2 # K[3]+4
+ VAF D5,D2,T3 # K[3]+5
+
+ VLR C0,K2
+ VLR C1,K2
+ VLR C2,K2
+ VLR C3,K2
+ VLR C4,K2
+ VLR C5,K2
+
+ VLR T1,D1
+ VLR T2,D2
+ VLR T3,D3
+
+.Loop_vx:
+ VAF A0,A0,B0
+ VAF A1,A1,B1
+ VAF A2,A2,B2
+ VAF A3,A3,B3
+ VAF A4,A4,B4
+ VAF A5,A5,B5
+ VX D0,D0,A0
+ VX D1,D1,A1
+ VX D2,D2,A2
+ VX D3,D3,A3
+ VX D4,D4,A4
+ VX D5,D5,A5
+ VERLLF D0,D0,16
+ VERLLF D1,D1,16
+ VERLLF D2,D2,16
+ VERLLF D3,D3,16
+ VERLLF D4,D4,16
+ VERLLF D5,D5,16
+
+ VAF C0,C0,D0
+ VAF C1,C1,D1
+ VAF C2,C2,D2
+ VAF C3,C3,D3
+ VAF C4,C4,D4
+ VAF C5,C5,D5
+ VX B0,B0,C0
+ VX B1,B1,C1
+ VX B2,B2,C2
+ VX B3,B3,C3
+ VX B4,B4,C4
+ VX B5,B5,C5
+ VERLLF B0,B0,12
+ VERLLF B1,B1,12
+ VERLLF B2,B2,12
+ VERLLF B3,B3,12
+ VERLLF B4,B4,12
+ VERLLF B5,B5,12
+
+ VAF A0,A0,B0
+ VAF A1,A1,B1
+ VAF A2,A2,B2
+ VAF A3,A3,B3
+ VAF A4,A4,B4
+ VAF A5,A5,B5
+ VX D0,D0,A0
+ VX D1,D1,A1
+ VX D2,D2,A2
+ VX D3,D3,A3
+ VX D4,D4,A4
+ VX D5,D5,A5
+ VERLLF D0,D0,8
+ VERLLF D1,D1,8
+ VERLLF D2,D2,8
+ VERLLF D3,D3,8
+ VERLLF D4,D4,8
+ VERLLF D5,D5,8
+
+ VAF C0,C0,D0
+ VAF C1,C1,D1
+ VAF C2,C2,D2
+ VAF C3,C3,D3
+ VAF C4,C4,D4
+ VAF C5,C5,D5
+ VX B0,B0,C0
+ VX B1,B1,C1
+ VX B2,B2,C2
+ VX B3,B3,C3
+ VX B4,B4,C4
+ VX B5,B5,C5
+ VERLLF B0,B0,7
+ VERLLF B1,B1,7
+ VERLLF B2,B2,7
+ VERLLF B3,B3,7
+ VERLLF B4,B4,7
+ VERLLF B5,B5,7
+
+ VSLDB C0,C0,C0,8
+ VSLDB C1,C1,C1,8
+ VSLDB C2,C2,C2,8
+ VSLDB C3,C3,C3,8
+ VSLDB C4,C4,C4,8
+ VSLDB C5,C5,C5,8
+ VSLDB B0,B0,B0,4
+ VSLDB B1,B1,B1,4
+ VSLDB B2,B2,B2,4
+ VSLDB B3,B3,B3,4
+ VSLDB B4,B4,B4,4
+ VSLDB B5,B5,B5,4
+ VSLDB D0,D0,D0,12
+ VSLDB D1,D1,D1,12
+ VSLDB D2,D2,D2,12
+ VSLDB D3,D3,D3,12
+ VSLDB D4,D4,D4,12
+ VSLDB D5,D5,D5,12
+
+ VAF A0,A0,B0
+ VAF A1,A1,B1
+ VAF A2,A2,B2
+ VAF A3,A3,B3
+ VAF A4,A4,B4
+ VAF A5,A5,B5
+ VX D0,D0,A0
+ VX D1,D1,A1
+ VX D2,D2,A2
+ VX D3,D3,A3
+ VX D4,D4,A4
+ VX D5,D5,A5
+ VERLLF D0,D0,16
+ VERLLF D1,D1,16
+ VERLLF D2,D2,16
+ VERLLF D3,D3,16
+ VERLLF D4,D4,16
+ VERLLF D5,D5,16
+
+ VAF C0,C0,D0
+ VAF C1,C1,D1
+ VAF C2,C2,D2
+ VAF C3,C3,D3
+ VAF C4,C4,D4
+ VAF C5,C5,D5
+ VX B0,B0,C0
+ VX B1,B1,C1
+ VX B2,B2,C2
+ VX B3,B3,C3
+ VX B4,B4,C4
+ VX B5,B5,C5
+ VERLLF B0,B0,12
+ VERLLF B1,B1,12
+ VERLLF B2,B2,12
+ VERLLF B3,B3,12
+ VERLLF B4,B4,12
+ VERLLF B5,B5,12
+
+ VAF A0,A0,B0
+ VAF A1,A1,B1
+ VAF A2,A2,B2
+ VAF A3,A3,B3
+ VAF A4,A4,B4
+ VAF A5,A5,B5
+ VX D0,D0,A0
+ VX D1,D1,A1
+ VX D2,D2,A2
+ VX D3,D3,A3
+ VX D4,D4,A4
+ VX D5,D5,A5
+ VERLLF D0,D0,8
+ VERLLF D1,D1,8
+ VERLLF D2,D2,8
+ VERLLF D3,D3,8
+ VERLLF D4,D4,8
+ VERLLF D5,D5,8
+
+ VAF C0,C0,D0
+ VAF C1,C1,D1
+ VAF C2,C2,D2
+ VAF C3,C3,D3
+ VAF C4,C4,D4
+ VAF C5,C5,D5
+ VX B0,B0,C0
+ VX B1,B1,C1
+ VX B2,B2,C2
+ VX B3,B3,C3
+ VX B4,B4,C4
+ VX B5,B5,C5
+ VERLLF B0,B0,7
+ VERLLF B1,B1,7
+ VERLLF B2,B2,7
+ VERLLF B3,B3,7
+ VERLLF B4,B4,7
+ VERLLF B5,B5,7
+
+ VSLDB C0,C0,C0,8
+ VSLDB C1,C1,C1,8
+ VSLDB C2,C2,C2,8
+ VSLDB C3,C3,C3,8
+ VSLDB C4,C4,C4,8
+ VSLDB C5,C5,C5,8
+ VSLDB B0,B0,B0,12
+ VSLDB B1,B1,B1,12
+ VSLDB B2,B2,B2,12
+ VSLDB B3,B3,B3,12
+ VSLDB B4,B4,B4,12
+ VSLDB B5,B5,B5,12
+ VSLDB D0,D0,D0,4
+ VSLDB D1,D1,D1,4
+ VSLDB D2,D2,D2,4
+ VSLDB D3,D3,D3,4
+ VSLDB D4,D4,D4,4
+ VSLDB D5,D5,D5,4
+ brct %r0,.Loop_vx
+
+ VAF A0,A0,K0
+ VAF B0,B0,K1
+ VAF C0,C0,K2
+ VAF D0,D0,K3
+ VAF A1,A1,K0
+ VAF D1,D1,T1 # +K[3]+1
+
+ VPERM A0,A0,A0,BEPERM
+ VPERM B0,B0,B0,BEPERM
+ VPERM C0,C0,C0,BEPERM
+ VPERM D0,D0,D0,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VAF D2,D2,T2 # +K[3]+2
+ VAF D3,D3,T3 # +K[3]+3
+ VLM T0,T3,0,INP,0
+
+ VX A0,A0,T0
+ VX B0,B0,T1
+ VX C0,C0,T2
+ VX D0,D0,T3
+
+ VLM K0,T3,0,%r7,4 # re-load sigma and increments
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_vx
+
+ VAF B1,B1,K1
+ VAF C1,C1,K2
+
+ VPERM A0,A1,A1,BEPERM
+ VPERM B0,B1,B1,BEPERM
+ VPERM C0,C1,C1,BEPERM
+ VPERM D0,D1,D1,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VLM A1,D1,0,INP,0
+
+ VX A0,A0,A1
+ VX B0,B0,B1
+ VX C0,C0,C1
+ VX D0,D0,D1
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_vx
+
+ VAF A2,A2,K0
+ VAF B2,B2,K1
+ VAF C2,C2,K2
+
+ VPERM A0,A2,A2,BEPERM
+ VPERM B0,B2,B2,BEPERM
+ VPERM C0,C2,C2,BEPERM
+ VPERM D0,D2,D2,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VLM A1,D1,0,INP,0
+
+ VX A0,A0,A1
+ VX B0,B0,B1
+ VX C0,C0,C1
+ VX D0,D0,D1
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_vx
+
+ VAF A3,A3,K0
+ VAF B3,B3,K1
+ VAF C3,C3,K2
+ VAF D2,K3,T3 # K[3]+3
+
+ VPERM A0,A3,A3,BEPERM
+ VPERM B0,B3,B3,BEPERM
+ VPERM C0,C3,C3,BEPERM
+ VPERM D0,D3,D3,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VAF D3,D2,T1 # K[3]+4
+ VLM A1,D1,0,INP,0
+
+ VX A0,A0,A1
+ VX B0,B0,B1
+ VX C0,C0,C1
+ VX D0,D0,D1
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_vx
+
+ VAF A4,A4,K0
+ VAF B4,B4,K1
+ VAF C4,C4,K2
+ VAF D4,D4,D3 # +K[3]+4
+ VAF D3,D3,T1 # K[3]+5
+ VAF K3,D2,T3 # K[3]+=6
+
+ VPERM A0,A4,A4,BEPERM
+ VPERM B0,B4,B4,BEPERM
+ VPERM C0,C4,C4,BEPERM
+ VPERM D0,D4,D4,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VLM A1,D1,0,INP,0
+
+ VX A0,A0,A1
+ VX B0,B0,B1
+ VX C0,C0,C1
+ VX D0,D0,D1
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ aghi LEN,-0x40
+ je .Ldone_vx
+
+ VAF A5,A5,K0
+ VAF B5,B5,K1
+ VAF C5,C5,K2
+ VAF D5,D5,D3 # +K[3]+5
+
+ VPERM A0,A5,A5,BEPERM
+ VPERM B0,B5,B5,BEPERM
+ VPERM C0,C5,C5,BEPERM
+ VPERM D0,D5,D5,BEPERM
+
+ clgfi LEN,0x40
+ jl .Ltail_vx
+
+ VLM A1,D1,0,INP,0
+
+ VX A0,A0,A1
+ VX B0,B0,B1
+ VX C0,C0,C1
+ VX D0,D0,D1
+
+ VSTM A0,D0,0,OUT,0
+
+ la INP,0x40(INP)
+ la OUT,0x40(OUT)
+ lhi %r0,10
+ aghi LEN,-0x40
+ jne .Loop_outer_vx
+
+.Ldone_vx:
+ lmg %r6,%r7,FRAME+6*8(SP)
+ la SP,FRAME(SP)
+ BR_EX %r14
+
+.Ltail_vx:
+ VSTM A0,D0,8*8,SP,3
+ lghi %r1,0
+
+.Loop_tail_vx:
+ llgc %r5,0(%r1,INP)
+ llgc %r6,8*8(%r1,SP)
+ xr %r6,%r5
+ stc %r6,0(%r1,OUT)
+ la %r1,1(%r1)
+ brct LEN,.Loop_tail_vx
+
+ lmg %r6,%r7,FRAME+6*8(SP)
+ la SP,FRAME(SP)
+ BR_EX %r14
+ENDPROC(chacha20_vx)
+
+.previous
diff --git a/arch/s390/crypto/chacha-s390.h b/arch/s390/crypto/chacha-s390.h
new file mode 100644
index 000000000000..733744ce30f5
--- /dev/null
+++ b/arch/s390/crypto/chacha-s390.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 ChaCha stream cipher.
+ *
+ * Copyright IBM Corp. 2021
+ */
+
+#ifndef _CHACHA_S390_H
+#define _CHACHA_S390_H
+
+void chacha20_vx(u8 *out, const u8 *inp, size_t len, const u32 *key,
+ const u32 *counter);
+
+#endif /* _CHACHA_S390_H */
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index fafecad20752..017143e9cef7 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -298,7 +298,7 @@ static void __exit crc_vx_mod_exit(void)
crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
}
-module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
module_exit(crc_vx_mod_exit);
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index bfbafd35bcbd..8e75b83a5ddc 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -194,7 +194,7 @@ static struct skcipher_alg cbc_des_alg = {
* same as DES. Implementers MUST reject keys that exhibit this
* property.
*
- * In fips mode additinally check for all 3 keys are unique.
+ * In fips mode additionally check for all 3 keys are unique.
*
*/
static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
@@ -492,7 +492,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, des_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
module_exit(des_s390_exit);
MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 6b07a2f1ce8a..0800a2a5799f 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -145,7 +145,7 @@ static void __exit ghash_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_cpu_feature_match(MSA, ghash_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 234d791ca59d..a077087bc6cc 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -528,7 +528,7 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
/* give mutex free before calling schedule() */
mutex_unlock(&prng_data->mutex);
schedule();
- /* occopy mutex again */
+ /* occupy mutex again */
if (mutex_lock_interruptible(&prng_data->mutex)) {
if (ret == 0)
ret = -ERESTARTSYS;
@@ -907,5 +907,5 @@ static void __exit prng_exit(void)
}
}
-module_cpu_feature_match(MSA, prng_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a3fabf310a38..bc3a22704e09 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -95,7 +95,7 @@ static void __exit sha1_s390_fini(void)
crypto_unregister_shash(&alg);
}
-module_cpu_feature_match(MSA, sha1_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
module_exit(sha1_s390_fini);
MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 24983f175676..6f1ccdf93d3e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -134,7 +134,7 @@ static void __exit sha256_s390_fini(void)
crypto_unregister_shash(&sha256_alg);
}
-module_cpu_feature_match(MSA, sha256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 30ac49b635bf..e1350e033a32 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -137,7 +137,7 @@ static void __exit sha3_256_s390_fini(void)
crypto_unregister_shash(&sha3_256_alg);
}
-module_cpu_feature_match(MSA, sha3_256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
module_exit(sha3_256_s390_fini);
MODULE_ALIAS_CRYPTO("sha3-256");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index e70d50f7620f..06c142ed9bb1 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -147,7 +147,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha3_384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 29a6bd404c59..04f11c407763 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -22,14 +22,14 @@ static int sha512_init(struct shash_desc *desc)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- *(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL;
- *(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL;
- *(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL;
- *(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL;
- *(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL;
- *(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL;
- *(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL;
- *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
+ *(__u64 *)&ctx->state[0] = SHA512_H0;
+ *(__u64 *)&ctx->state[2] = SHA512_H1;
+ *(__u64 *)&ctx->state[4] = SHA512_H2;
+ *(__u64 *)&ctx->state[6] = SHA512_H3;
+ *(__u64 *)&ctx->state[8] = SHA512_H4;
+ *(__u64 *)&ctx->state[10] = SHA512_H5;
+ *(__u64 *)&ctx->state[12] = SHA512_H6;
+ *(__u64 *)&ctx->state[14] = SHA512_H7;
ctx->count = 0;
ctx->func = CPACF_KIMD_SHA_512;
@@ -87,14 +87,14 @@ static int sha384_init(struct shash_desc *desc)
{
struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
- *(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
- *(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL;
- *(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL;
- *(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL;
- *(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL;
- *(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL;
- *(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL;
- *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
+ *(__u64 *)&ctx->state[0] = SHA384_H0;
+ *(__u64 *)&ctx->state[2] = SHA384_H1;
+ *(__u64 *)&ctx->state[4] = SHA384_H2;
+ *(__u64 *)&ctx->state[6] = SHA384_H3;
+ *(__u64 *)&ctx->state[8] = SHA384_H4;
+ *(__u64 *)&ctx->state[10] = SHA384_H5;
+ *(__u64 *)&ctx->state[12] = SHA384_H6;
+ *(__u64 *)&ctx->state[14] = SHA384_H7;
ctx->count = 0;
ctx->func = CPACF_KIMD_SHA_512;
@@ -142,7 +142,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index f0bc4dc3e9bf..6511d15ace45 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -437,7 +437,7 @@ __init int hypfs_diag_init(void)
int rc;
if (diag204_probe()) {
- pr_err("The hardware system does not support hypfs\n");
+ pr_info("The hardware system does not support hypfs\n");
return -ENODATA;
}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 33f973ff9744..a3d881ca0a98 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+#include <asm/extable.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/timex.h>
@@ -20,6 +21,7 @@
static char local_guest[] = " ";
static char all_guests[] = "* ";
+static char *all_groups = all_guests;
static char *guest_query;
struct diag2fc_data {
@@ -62,10 +64,11 @@ static int diag2fc(int size, char* query, void *addr)
memcpy(parm_list.userid, query, NAME_LEN);
ASCEBC(parm_list.userid, NAME_LEN);
- parm_list.addr = (unsigned long) addr ;
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
parm_list.size = size;
parm_list.fmt = 0x02;
- memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
diag_stat_inc(DIAG_STAT_X2FC);
@@ -187,7 +190,7 @@ int hypfs_vm_create_files(struct dentry *root)
if (IS_ERR(data))
return PTR_ERR(data);
- /* Hpervisor Info */
+ /* Hypervisor Info */
dir = hypfs_mkdir(root, "hyp");
if (IS_ERR(dir)) {
rc = PTR_ERR(dir);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5c97f48cea91..ee919bfc8186 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -496,9 +496,9 @@ fail_hypfs_sprp_exit:
hypfs_vm_exit();
fail_hypfs_diag_exit:
hypfs_diag_exit();
+ pr_err("Initialization of hypfs failed with rc=%i\n", rc);
fail_dbfs_exit:
hypfs_dbfs_exit();
- pr_err("Initialization of hypfs failed with rc=%i\n", rc);
return rc;
}
device_initcall(hypfs_init)
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 000000000000..4c61b14ee928
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+extern bool abs_lowcore_mapped;
+
+struct lowcore *get_abs_lowcore(unsigned long *flags);
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 01936fdfaddb..e82e5626e139 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -12,10 +12,11 @@
#include <linux/bit_spinlock.h>
#include <linux/dma-mapping.h>
+#include <asm/tpi.h>
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
- void (*handler)(struct airq_struct *airq, bool floating);
+ void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
u8 lsi_mask; /* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
@@ -46,8 +47,10 @@ struct airq_iv {
#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
#define AIRQ_IV_DATA 8 /* Allocate the data array */
#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
+#define AIRQ_IV_GUESTVEC 32 /* Vector is a pinned guest page */
-struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
+ unsigned long *vec);
void airq_iv_release(struct airq_iv *iv);
unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
index 955d620db23e..7db046596b93 100644
--- a/arch/s390/include/asm/alternative-asm.h
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -5,19 +5,6 @@
#ifdef __ASSEMBLY__
/*
- * Check the length of an instruction sequence. The length may not be larger
- * than 254 bytes and it has to be divisible by 2.
- */
-.macro alt_len_check start,end
- .if ( \end - \start ) > 254
- .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
- .endif
- .if ( \end - \start ) % 2
- .error "cpu alternatives instructions length is odd\n"
- .endif
-.endm
-
-/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
@@ -28,60 +15,29 @@
.long \alt_start - .
.word \feature
.byte \orig_end - \orig_start
- .byte \alt_end - \alt_start
-.endm
-
-/*
- * Fill up @bytes with nops. The macro emits 6-byte nop instructions
- * for the bulk of the area, possibly followed by a 4-byte and/or
- * a 2-byte nop if the size of the area is not divisible by 6.
- */
-.macro alt_pad_fill bytes
- .fill ( \bytes ) / 6, 6, 0xc0040000
- .fill ( \bytes ) % 6 / 4, 4, 0x47000000
- .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700
-.endm
-
-/*
- * Fill up @bytes with nops. If the number of bytes is larger
- * than 6, emit a jg instruction to branch over all nops, then
- * fill an area of size (@bytes - 6) with nop instructions.
- */
-.macro alt_pad bytes
- .if ( \bytes > 0 )
- .if ( \bytes > 6 )
- jg . + \bytes
- alt_pad_fill \bytes - 6
- .else
- alt_pad_fill \bytes
- .endif
- .endif
+ .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+ .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, feature
.pushsection .altinstr_replacement,"ax"
770: \newinstr
771: .popsection
772: \oldinstr
-773: alt_len_check 770b, 771b
- alt_len_check 772b, 773b
- alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
-774: .pushsection .altinstructions,"a"
- alt_entry 772b, 774b, 770b, 771b, \feature
+773: .pushsection .altinstructions,"a"
+ alt_entry 772b, 773b, 770b, 771b, \feature
.popsection
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
+ * @newinstr.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
.pushsection .altinstr_replacement,"ax"
@@ -89,17 +45,9 @@
771: \newinstr2
772: .popsection
773: \oldinstr
-774: alt_len_check 770b, 771b
- alt_len_check 771b, 772b
- alt_len_check 773b, 774b
- .if ( 771b - 770b > 772b - 771b )
- alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
- .else
- alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
- .endif
-775: .pushsection .altinstructions,"a"
- alt_entry 773b, 775b, 770b, 771b,\feature1
- alt_entry 773b, 775b, 771b, 772b,\feature2
+774: .pushsection .altinstructions,"a"
+ alt_entry 773b, 774b, 770b, 771b,\feature1
+ alt_entry 773b, 774b, 771b, 772b,\feature2
.popsection
.endm
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index d3880ca764ee..904dd049f954 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -13,32 +13,25 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */
u16 facility; /* facility bit set for replacement */
u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction */
} __packed;
void apply_alternative_instructions(void);
void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/*
- * |661: |662: |6620 |663:
- * +-----------+---------------------+
- * | oldinstr | oldinstr_padding |
- * | +----------+----------+
- * | | | |
- * | | >6 bytes |6/4/2 nops|
- * | |6 bytes jg----------->
- * +-----------+---------------------+
- * ^^ static padding ^^
+ * +---------------------------------+
+ * |661: |662:
+ * | oldinstr |
+ * +---------------------------------+
*
* .altinstr_replacement section
- * +---------------------+-----------+
+ * +---------------------------------+
* |6641: |6651:
* | alternative instr 1 |
- * +-----------+---------+- - - - - -+
- * |6642: |6652: |
- * | alternative instr 2 | padding
- * +---------------------+- - - - - -+
- * ^ runtime ^
+ * +---------------------------------+
+ * |6642: |6652:
+ * | alternative instr 2 |
+ * +---------------------------------+
*
* .altinstructions section
* +---------------------------------+
@@ -47,70 +40,31 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* +---------------------------------+
*/
-#define b_altinstr(num) "664"#num
-#define e_altinstr(num) "665"#num
-
-#define e_oldinstr_pad_end "663"
+#define b_altinstr(num) "664"#num
+#define e_altinstr(num) "665"#num
#define oldinstr_len "662b-661b"
-#define oldinstr_total_len e_oldinstr_pad_end"b-661b"
#define altinstr_len(num) e_altinstr(num)"b-"b_altinstr(num)"b"
-#define oldinstr_pad_len(num) \
- "-(((" altinstr_len(num) ")-(" oldinstr_len ")) > 0) * " \
- "((" altinstr_len(num) ")-(" oldinstr_len "))"
-
-#define INSTR_LEN_SANITY_CHECK(len) \
- ".if " len " > 254\n" \
- "\t.error \"cpu alternatives does not support instructions " \
- "blocks > 254 bytes\"\n" \
- ".endif\n" \
- ".if (" len ") %% 2\n" \
- "\t.error \"cpu alternatives instructions length is odd\"\n" \
- ".endif\n"
-
-#define OLDINSTR_PADDING(oldinstr, num) \
- ".if " oldinstr_pad_len(num) " > 6\n" \
- "\tjg " e_oldinstr_pad_end "f\n" \
- "6620:\n" \
- "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \
- ".else\n" \
- "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \
- "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \
- "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \
- ".endif\n"
-
-#define OLDINSTR(oldinstr, num) \
- "661:\n\t" oldinstr "\n662:\n" \
- OLDINSTR_PADDING(oldinstr, num) \
- e_oldinstr_pad_end ":\n" \
- INSTR_LEN_SANITY_CHECK(oldinstr_len)
-
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "661:\n\t" oldinstr "\n662:\n" \
- ".if " altinstr_len(num1) " < " altinstr_len(num2) "\n" \
- OLDINSTR_PADDING(oldinstr, num2) \
- ".else\n" \
- OLDINSTR_PADDING(oldinstr, num1) \
- ".endif\n" \
- e_oldinstr_pad_end ":\n" \
- INSTR_LEN_SANITY_CHECK(oldinstr_len)
+
+#define OLDINSTR(oldinstr) \
+ "661:\n\t" oldinstr "\n662:\n"
#define ALTINSTR_ENTRY(facility, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
"\t.word " __stringify(facility) "\n" /* facility bit */ \
- "\t.byte " oldinstr_total_len "\n" /* source len */ \
- "\t.byte " altinstr_len(num) "\n" /* alt instruction len */
+ "\t.byte " oldinstr_len "\n" /* instruction len */ \
+ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
+ "\t.org . - (" altinstr_len(num) ") + (" oldinstr_len ")\n"
#define ALTINSTR_REPLACEMENT(altinstr, num) /* replacement */ \
- b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \
- INSTR_LEN_SANITY_CHECK(altinstr_len(num))
+ b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, altinstr, facility) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
- OLDINSTR(oldinstr, 1) \
+ OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility, 1) \
".popsection\n"
@@ -120,7 +74,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
- OLDINSTR_2(oldinstr, 1, 2) \
+ OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility1, 1) \
ALTINSTR_ENTRY(facility2, 2) \
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 3afbee21dc1f..f508f5025e38 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -12,6 +12,9 @@
#ifndef _ASM_S390_AP_H_
#define _ASM_S390_AP_H_
+#include <linux/io.h>
+#include <asm/asm-extable.h>
+
/**
* The ap_qid_t identifier of an ap queue.
* If the AP facilities test (APFT) facility is available,
@@ -57,11 +60,11 @@ static inline bool ap_instructions_available(void)
unsigned long reg1 = 0;
asm volatile(
- " lgr 0,%[reg0]\n" /* qid into gr0 */
- " lghi 1,0\n" /* 0 into gr1 */
- " lghi 2,0\n" /* 0 into gr2 */
- " .long 0xb2af0000\n" /* PQAP(TAPQ) */
- "0: la %[reg1],1\n" /* 1 into reg1 */
+ " lgr 0,%[reg0]\n" /* qid into gr0 */
+ " lghi 1,0\n" /* 0 into gr1 */
+ " lghi 2,0\n" /* 0 into gr2 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
+ "0: la %[reg1],1\n" /* 1 into reg1 */
"1:\n"
EX_TABLE(0b, 1b)
: [reg1] "+&d" (reg1)
@@ -83,11 +86,11 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
unsigned long reg2;
asm volatile(
- " lgr 0,%[qid]\n" /* qid into gr0 */
- " lghi 2,0\n" /* 0 into gr2 */
- " .long 0xb2af0000\n" /* PQAP(TAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* gr2 into reg2 */
+ " lgr 0,%[qid]\n" /* qid into gr0 */
+ " lghi 2,0\n" /* 0 into gr2 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg2],2\n" /* gr2 into reg2 */
: [reg1] "=&d" (reg1), [reg2] "=&d" (reg2)
: [qid] "d" (qid)
: "cc", "0", "1", "2");
@@ -125,9 +128,9 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
struct ap_queue_status reg1;
asm volatile(
- " lgr 0,%[reg0]\n" /* qid arg into gr0 */
- " .long 0xb2af0000\n" /* PQAP(RAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr 0,%[reg0]\n" /* qid arg into gr0 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -146,9 +149,9 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
struct ap_queue_status reg1;
asm volatile(
- " lgr 0,%[reg0]\n" /* qid arg into gr0 */
- " .long 0xb2af0000\n" /* PQAP(ZAPQ) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr 0,%[reg0]\n" /* qid arg into gr0 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
: [reg1] "=&d" (reg1)
: [reg0] "d" (reg0)
: "cc", "0", "1");
@@ -187,10 +190,10 @@ static inline int ap_qci(struct ap_config_info *config)
struct ap_config_info *reg2 = config;
asm volatile(
- " lgr 0,%[reg0]\n" /* QCI fc into gr0 */
- " lgr 2,%[reg2]\n" /* ptr to config into gr2 */
- " .long 0xb2af0000\n" /* PQAP(QCI) */
- "0: la %[reg1],0\n" /* good case, QCI fc available */
+ " lgr 0,%[reg0]\n" /* QCI fc into gr0 */
+ " lgr 2,%[reg2]\n" /* ptr to config into gr2 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(QCI) */
+ "0: la %[reg1],0\n" /* good case, QCI fc available */
"1:\n"
EX_TABLE(0b, 1b)
: [reg1] "+&d" (reg1)
@@ -224,13 +227,13 @@ struct ap_qirq_ctrl {
* ap_aqic(): Control interruption for a specific AP.
* @qid: The AP queue number
* @qirqctrl: struct ap_qirq_ctrl (64 bit value)
- * @ind: The notification indicator byte
+ * @pa_ind: Physical address of the notification indicator byte
*
* Returns AP queue status.
*/
static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
- void *ind)
+ phys_addr_t pa_ind)
{
unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */
union {
@@ -238,16 +241,16 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl;
struct ap_queue_status status;
} reg1;
- void *reg2 = ind;
+ unsigned long reg2 = pa_ind;
reg1.qirqctrl = qirqctrl;
asm volatile(
- " lgr 0,%[reg0]\n" /* qid param into gr0 */
- " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
- " lgr 2,%[reg2]\n" /* ni addr into gr2 */
- " .long 0xb2af0000\n" /* PQAP(AQIC) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr 0,%[reg0]\n" /* qid param into gr0 */
+ " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */
+ " lgr 2,%[reg2]\n" /* ni addr into gr2 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
: [reg1] "+&d" (reg1)
: [reg0] "d" (reg0), [reg2] "d" (reg2)
: "cc", "0", "1", "2");
@@ -294,11 +297,11 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
reg1.value = apinfo->val;
asm volatile(
- " lgr 0,%[reg0]\n" /* qid param into gr0 */
- " lgr 1,%[reg1]\n" /* qact in info into gr1 */
- " .long 0xb2af0000\n" /* PQAP(QACT) */
- " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
- " lgr %[reg2],2\n" /* qact out info into reg2 */
+ " lgr 0,%[reg0]\n" /* qid param into gr0 */
+ " lgr 1,%[reg1]\n" /* qact in info into gr1 */
+ " .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */
+ " lgr %[reg1],1\n" /* gr1 (status) into reg1 */
+ " lgr %[reg2],2\n" /* qact out info into reg2 */
: [reg1] "+&d" (reg1), [reg2] "=&d" (reg2)
: [reg0] "d" (reg0)
: "cc", "0", "1", "2");
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index 5dc712fde3c7..1594049893e0 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -2,7 +2,7 @@
/*
* Kernel interface for the s390 arch_random_* functions
*
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -11,44 +11,28 @@
#ifndef _ASM_S390_ARCHRANDOM_H
#define _ASM_S390_ARCHRANDOM_H
-#ifdef CONFIG_ARCH_RANDOM
-
#include <linux/static_key.h>
+#include <linux/preempt.h>
#include <linux/atomic.h>
+#include <asm/cpacf.h>
DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
extern atomic64_t s390_arch_random_counter;
-bool s390_arch_get_random_long(unsigned long *v);
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
-
-static inline bool __must_check arch_get_random_long(unsigned long *v)
-{
- if (static_branch_likely(&s390_arch_random_available))
- return s390_arch_get_random_long(v);
- return false;
-}
-
-static inline bool __must_check arch_get_random_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
- return false;
-}
-
-static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
-{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
- }
- return false;
+ return 0;
}
-static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, max_longs * sizeof(*v));
+ atomic64_add(max_longs * sizeof(*v), &s390_arch_random_counter);
+ return max_longs;
}
- return false;
+ return 0;
}
-#endif /* CONFIG_ARCH_RANDOM */
#endif /* _ASM_S390_ARCHRANDOM_H */
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
new file mode 100644
index 000000000000..b74f1070ddb2
--- /dev/null
+++ b/arch/s390/include/asm/asm-extable.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
+
+#include <linux/stringify.h>
+#include <linux/bits.h>
+#include <asm/asm-const.h>
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_FIXUP 1
+#define EX_TYPE_BPF 2
+#define EX_TYPE_UA_STORE 3
+#define EX_TYPE_UA_LOAD_MEM 4
+#define EX_TYPE_UA_LOAD_REG 5
+
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(3, 0)
+
+#define EX_DATA_REG_ADDR_SHIFT 4
+#define EX_DATA_REG_ADDR GENMASK(7, 4)
+
+#define EX_DATA_LEN_SHIFT 8
+#define EX_DATA_LEN GENMASK(11, 8)
+
+#define __EX_TABLE(_section, _fault, _target, _type) \
+ stringify_in_c(.section _section,"a";) \
+ stringify_in_c(.align 4;) \
+ stringify_in_c(.long (_fault) - .;) \
+ stringify_in_c(.long (_target) - .;) \
+ stringify_in_c(.short (_type);) \
+ stringify_in_c(.short 0;) \
+ stringify_in_c(.previous)
+
+#define __EX_TABLE_UA(_section, _fault, _target, _type, _regerr, _regaddr, _len)\
+ stringify_in_c(.section _section,"a";) \
+ stringify_in_c(.align 4;) \
+ stringify_in_c(.long (_fault) - .;) \
+ stringify_in_c(.long (_target) - .;) \
+ stringify_in_c(.short (_type);) \
+ stringify_in_c(.macro extable_reg regerr, regaddr;) \
+ stringify_in_c(.set .Lfound, 0;) \
+ stringify_in_c(.set .Lcurr, 0;) \
+ stringify_in_c(.irp rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15;) \
+ stringify_in_c( .ifc "\regerr", "%%r\rs";) \
+ stringify_in_c( .set .Lfound, 1;) \
+ stringify_in_c( .set .Lregerr, .Lcurr;) \
+ stringify_in_c( .endif;) \
+ stringify_in_c( .set .Lcurr, .Lcurr+1;) \
+ stringify_in_c(.endr;) \
+ stringify_in_c(.ifne (.Lfound != 1);) \
+ stringify_in_c( .error "extable_reg: bad register argument1";) \
+ stringify_in_c(.endif;) \
+ stringify_in_c(.set .Lfound, 0;) \
+ stringify_in_c(.set .Lcurr, 0;) \
+ stringify_in_c(.irp rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15;) \
+ stringify_in_c( .ifc "\regaddr", "%%r\rs";) \
+ stringify_in_c( .set .Lfound, 1;) \
+ stringify_in_c( .set .Lregaddr, .Lcurr;) \
+ stringify_in_c( .endif;) \
+ stringify_in_c( .set .Lcurr, .Lcurr+1;) \
+ stringify_in_c(.endr;) \
+ stringify_in_c(.ifne (.Lfound != 1);) \
+ stringify_in_c( .error "extable_reg: bad register argument2";) \
+ stringify_in_c(.endif;) \
+ stringify_in_c(.short .Lregerr << EX_DATA_REG_ERR_SHIFT | \
+ .Lregaddr << EX_DATA_REG_ADDR_SHIFT | \
+ _len << EX_DATA_LEN_SHIFT;) \
+ stringify_in_c(.endm;) \
+ stringify_in_c(extable_reg _regerr,_regaddr;) \
+ stringify_in_c(.purgem extable_reg;) \
+ stringify_in_c(.previous)
+
+#define EX_TABLE(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FIXUP)
+
+#define EX_TABLE_AMODE31(_fault, _target) \
+ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP)
+
+#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0)
+
+#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len)
+
+#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+
+#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 2c057e1f3200..82de2a7c4160 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -26,14 +26,14 @@ static __always_inline void bcr_serialize(void)
asm volatile(__ASM_BCR_SERIALIZE : : : "memory");
}
-#define mb() bcr_serialize()
-#define rmb() barrier()
-#define wmb() barrier()
-#define dma_rmb() mb()
-#define dma_wmb() mb()
-#define __smp_mb() mb()
-#define __smp_rmb() rmb()
-#define __smp_wmb() wmb()
+#define __mb() bcr_serialize()
+#define __rmb() barrier()
+#define __wmb() barrier()
+#define __dma_rmb() __mb()
+#define __dma_wmb() __mb()
+#define __smp_mb() __mb()
+#define __smp_rmb() __rmb()
+#define __smp_wmb() __wmb()
#define __smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 5a530c552c23..2de74fcd0578 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -113,76 +113,71 @@ static inline bool arch_test_and_change_bit(unsigned long nr,
return old & mask;
}
-static inline void arch___set_bit(unsigned long nr, volatile unsigned long *ptr)
+static __always_inline void
+arch___set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr |= mask;
+ *p |= mask;
}
-static inline void arch___clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline void
+arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr &= ~mask;
+ *p &= ~mask;
}
-static inline void arch___change_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline void
+arch___change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
- *addr ^= mask;
+ *p ^= mask;
}
-static inline bool arch___test_and_set_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr |= mask;
+ old = *p;
+ *p |= mask;
return old & mask;
}
-static inline bool arch___test_and_clear_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr &= ~mask;
+ old = *p;
+ *p &= ~mask;
return old & mask;
}
-static inline bool arch___test_and_change_bit(unsigned long nr,
- volatile unsigned long *ptr)
+static __always_inline bool
+arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *addr = __bitops_word(nr, ptr);
+ unsigned long *p = __bitops_word(nr, addr);
unsigned long mask = __bitops_mask(nr);
unsigned long old;
- old = *addr;
- *addr ^= mask;
+ old = *p;
+ *p ^= mask;
return old & mask;
}
-static inline bool arch_test_bit(unsigned long nr,
- const volatile unsigned long *ptr)
-{
- const volatile unsigned long *addr = __bitops_word(nr, ptr);
- unsigned long mask = __bitops_mask(nr);
-
- return *addr & mask;
-}
+#define arch_test_bit generic_test_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
static inline bool arch_test_and_set_bit_lock(unsigned long nr,
volatile unsigned long *ptr)
@@ -256,8 +251,6 @@ static inline bool test_bit_inv(unsigned long nr,
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
-
/**
* __flogr - find leftmost one
* @word - The word to search
@@ -376,18 +369,7 @@ static inline int fls(unsigned int word)
return fls64(word);
}
-#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
-
-#include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffs.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/fls64.h>
-
-#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
-
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 0b25f28351ed..aebe1e22c7be 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -15,7 +15,8 @@
"1: .asciz \""__FILE__"\"\n" \
".previous\n" \
".section __bug_table,\"awM\",@progbits,%2\n" \
- "2: .long 0b-2b,1b-2b\n" \
+ "2: .long 0b-.\n" \
+ " .long 1b-.\n" \
" .short %0,%1\n" \
" .org 2b+%2\n" \
".previous\n" \
@@ -30,7 +31,7 @@
asm_inline volatile( \
"0: mc 0,0\n" \
".section __bug_table,\"awM\",@progbits,%1\n" \
- "1: .long 0b-1b\n" \
+ "1: .long 0b-.\n" \
" .short %0\n" \
" .org 1b+%1\n" \
".previous\n" \
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index d4e90f2ba77e..bd1596810cc1 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -214,7 +214,6 @@ extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
extern int ccw_device_enable_console(struct ccw_device *);
extern void ccw_device_wait_idle(struct ccw_device *);
-extern int ccw_device_force_console(struct ccw_device *);
extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
extern void ccw_device_dma_free(struct ccw_device *cdev,
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index aa995d91cd1d..11d2fb3de4f5 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -25,7 +25,7 @@ struct ccwgroup_device {
unsigned int count;
struct device dev;
struct work_struct ungroup_work;
- struct ccw_device *cdev[0];
+ struct ccw_device *cdev[];
};
/**
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index ae4d2549cd67..bb48ea380c0d 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -63,7 +63,7 @@ struct chsc_pnso_area {
struct chsc_header response;
u32:32;
struct chsc_pnso_naihdr naihdr;
- struct chsc_pnso_naid_l2 entries[0];
+ struct chsc_pnso_naid_l2 entries[];
} __packed __aligned(PAGE_SIZE);
#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1effac6a0152..1c4f585dd39b 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -369,7 +369,7 @@ void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
/* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
int chsc_stzi(void *page, void *result, size_t size);
int chsc_sgib(u32 origin);
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index cdc7ae72529d..a386070f1d56 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -8,10 +8,23 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
+#include <asm/ptrace.h>
#define compat_mode_t compat_mode_t
typedef u16 compat_mode_t;
+#define __compat_uid_t __compat_uid_t
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
+
+#define compat_dev_t compat_dev_t
+typedef u16 compat_dev_t;
+
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_statfs compat_statfs
+
#include <asm-generic/compat.h>
#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
@@ -22,46 +35,16 @@ typedef u16 compat_mode_t;
(__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
})
-#define PSW32_MASK_PER 0x40000000UL
-#define PSW32_MASK_DAT 0x04000000UL
-#define PSW32_MASK_IO 0x02000000UL
-#define PSW32_MASK_EXT 0x01000000UL
-#define PSW32_MASK_KEY 0x00F00000UL
-#define PSW32_MASK_BASE 0x00080000UL /* Always one */
-#define PSW32_MASK_MCHECK 0x00040000UL
-#define PSW32_MASK_WAIT 0x00020000UL
-#define PSW32_MASK_PSTATE 0x00010000UL
-#define PSW32_MASK_ASC 0x0000C000UL
-#define PSW32_MASK_CC 0x00003000UL
-#define PSW32_MASK_PM 0x00000f00UL
-#define PSW32_MASK_RI 0x00000080UL
-
#define PSW32_MASK_USER 0x0000FF00UL
-#define PSW32_ADDR_AMODE 0x80000000UL
-#define PSW32_ADDR_INSN 0x7FFFFFFFUL
-
-#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW32_ASC_PRIMARY 0x00000000UL
-#define PSW32_ASC_ACCREG 0x00004000UL
-#define PSW32_ASC_SECONDARY 0x00008000UL
-#define PSW32_ASC_HOME 0x0000C000UL
-
#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
PSW32_ASC_PRIMARY)
-#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
-typedef u16 __compat_uid_t;
-typedef u16 __compat_gid_t;
-typedef u16 compat_dev_t;
typedef u16 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef __kernel_fsid_t compat_fsid_t;
typedef struct {
u32 mask;
@@ -102,26 +85,6 @@ struct compat_stat {
u32 __unused5;
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-};
-
struct compat_statfs {
u32 f_type;
u32 f_bsize;
@@ -152,10 +115,6 @@ struct compat_statfs64 {
u32 f_spare[4];
};
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-
/*
* A pointer passed in from user mode. This should not
* be used for syscall parameters, just declare them
@@ -178,61 +137,4 @@ static inline int is_compat_task(void)
#endif
-struct compat_ipc64_perm {
- compat_key_t key;
- __compat_uid32_t uid;
- __compat_gid32_t gid;
- __compat_uid32_t cuid;
- __compat_gid32_t cgid;
- compat_mode_t mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- unsigned int __unused1;
- unsigned int __unused2;
-};
-
-struct compat_semid64_ds {
- struct compat_ipc64_perm sem_perm;
- compat_ulong_t sem_otime;
- compat_ulong_t sem_otime_high;
- compat_ulong_t sem_ctime;
- compat_ulong_t sem_ctime_high;
- compat_ulong_t sem_nsems;
- compat_ulong_t __unused1;
- compat_ulong_t __unused2;
-};
-
-struct compat_msqid64_ds {
- struct compat_ipc64_perm msg_perm;
- compat_ulong_t msg_stime;
- compat_ulong_t msg_stime_high;
- compat_ulong_t msg_rtime;
- compat_ulong_t msg_rtime_high;
- compat_ulong_t msg_ctime;
- compat_ulong_t msg_ctime_high;
- compat_ulong_t msg_cbytes;
- compat_ulong_t msg_qnum;
- compat_ulong_t msg_qbytes;
- compat_pid_t msg_lspid;
- compat_pid_t msg_lrpid;
- compat_ulong_t __unused1;
- compat_ulong_t __unused2;
-};
-
-struct compat_shmid64_ds {
- struct compat_ipc64_perm shm_perm;
- compat_size_t shm_segsz;
- compat_ulong_t shm_atime;
- compat_ulong_t shm_atime_high;
- compat_ulong_t shm_dtime;
- compat_ulong_t shm_dtime_high;
- compat_ulong_t shm_ctime;
- compat_ulong_t shm_ctime_high;
- compat_pid_t shm_cpid;
- compat_pid_t shm_lpid;
- compat_ulong_t shm_nattch;
- compat_ulong_t __unused1;
- compat_ulong_t __unused2;
-};
#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 0d90cbeb89b4..feaba12dbecb 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -10,6 +10,7 @@
#define _ASM_S390_CPU_MF_H
#include <linux/errno.h>
+#include <asm/asm-extable.h>
#include <asm/facility.h>
asm(".include \"asm/cpu_mf-insn.h\"\n");
@@ -109,7 +110,9 @@ struct hws_basic_entry {
unsigned int AS:2; /* 29-30 PSW address-space control */
unsigned int I:1; /* 31 entry valid or invalid */
unsigned int CL:2; /* 32-33 Configuration Level */
- unsigned int:14;
+ unsigned int H:1; /* 34 Host Indicator */
+ unsigned int LS:1; /* 35 Limited Sampling */
+ unsigned int:12;
unsigned int prim_asn:16; /* primary ASN */
unsigned long long ia; /* Instruction Address */
unsigned long long gpp; /* Guest Program Parameter */
@@ -157,7 +160,7 @@ struct hws_trailer_entry {
/* Load program parameter */
static inline void lpp(void *pp)
{
- asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory");
+ asm volatile("lpp 0(%0)\n" :: "a" (pp) : "memory");
}
/* Query counter information */
@@ -166,7 +169,7 @@ static inline int qctri(struct cpumf_ctr_info *info)
int rc = -EINVAL;
asm volatile (
- "0: .insn s,0xb28e0000,%1\n"
+ "0: qctri %1\n"
"1: lhi %0,0\n"
"2:\n"
EX_TABLE(1b, 2b)
@@ -180,7 +183,7 @@ static inline int lcctl(u64 ctl)
int cc;
asm volatile (
- " .insn s,0xb2840000,%1\n"
+ " lcctl %1\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (cc) : "Q" (ctl) : "cc");
@@ -194,7 +197,7 @@ static inline int __ecctr(u64 ctr, u64 *content)
int cc;
asm volatile (
- " .insn rre,0xb2e40000,%0,%2\n"
+ " ecctr %0,%2\n"
" ipm %1\n"
" srl %1,28\n"
: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
@@ -244,7 +247,7 @@ static inline int qsi(struct hws_qsi_info_block *info)
int cc = 1;
asm volatile(
- "0: .insn s,0xb2860000,%1\n"
+ "0: qsi %1\n"
"1: lhi %0,0\n"
"2:\n"
EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
@@ -259,7 +262,7 @@ static inline int lsctl(struct hws_lsctl_request_block *req)
cc = 1;
asm volatile(
- "0: .insn s,0xb2870000,0(%1)\n"
+ "0: lsctl 0(%1)\n"
"1: ipm %0\n"
" srl %0,28\n"
"2:\n"
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 14cfd48d598e..931204613753 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -2,28 +2,21 @@
/*
* Module interface for CPU features
*
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2022
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
#ifndef __ASM_S390_CPUFEATURE_H
#define __ASM_S390_CPUFEATURE_H
-#include <asm/elf.h>
+enum {
+ S390_CPU_FEATURE_MSA,
+ S390_CPU_FEATURE_VXRS,
+ S390_CPU_FEATURE_UV,
+ MAX_CPU_FEATURES
+};
-/* Hardware features on Linux on z Systems are indicated by facility bits that
- * are mapped to the so-called machine flags. Particular machine flags are
- * then used to define ELF hardware capabilities; most notably hardware flags
- * that are essential for user space / glibc.
- *
- * Restrict the set of exposed CPU features to ELF hardware capabilities for
- * now. Additional machine flags can be indicated by values larger than
- * MAX_ELF_HWCAP_FEATURES.
- */
-#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
-#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
-
-#define cpu_feature(feat) ilog2(HWCAP_ ## feat)
+#define cpu_feature(feature) (feature)
int cpu_have_feature(unsigned int nr);
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
index c6ebfd31f1db..97456d98fe76 100644
--- a/arch/s390/include/asm/crw.h
+++ b/arch/s390/include/asm/crw.h
@@ -5,7 +5,6 @@
* Author(s): Ingo Adlung <adlung@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
* Cornelia Huck <cornelia.huck@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#ifndef _ASM_S390_CRW_H
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 04dc65f8901d..adf7d8cdac7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -12,6 +12,8 @@
#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
+#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38)
+#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39)
#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
@@ -72,8 +74,17 @@ static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
__ctl_load(reg, cr, cr);
}
-void smp_ctl_set_bit(int cr, int bit);
-void smp_ctl_clear_bit(int cr, int bit);
+void smp_ctl_set_clear_bit(int cr, int bit, bool set);
+
+static inline void ctl_set_bit(int cr, int bit)
+{
+ smp_ctl_set_clear_bit(cr, bit, true);
+}
+
+static inline void ctl_clear_bit(int cr, int bit)
+{
+ smp_ctl_set_clear_bit(cr, bit, false);
+}
union ctlreg0 {
unsigned long val;
@@ -82,7 +93,10 @@ union ctlreg0 {
unsigned long tcx : 1; /* Transactional-Execution control */
unsigned long pifo : 1; /* Transactional-Execution Program-
Interruption-Filtering Override */
- unsigned long : 22;
+ unsigned long : 3;
+ unsigned long ccc : 1; /* Cryptography counter control */
+ unsigned long pec : 1; /* PAI extension control */
+ unsigned long : 17;
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
@@ -128,8 +142,5 @@ union ctlreg15 {
};
};
-#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
-#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
-
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index b3a8cb4daed6..56e99c286d12 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -11,6 +11,7 @@
#include <linux/if_ether.h>
#include <linux/percpu.h>
+#include <asm/asm-extable.h>
enum diag_stat_enum {
DIAG_STAT_X008,
@@ -47,8 +48,8 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
{
unsigned long start_addr, end_addr;
- start_addr = start_pfn << PAGE_SHIFT;
- end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+ start_addr = pfn_to_phys(start_pfn);
+ end_addr = pfn_to_phys(start_pfn + num_pfn - 1);
diag_stat_inc(DIAG_STAT_X010);
asm volatile(
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
index 6f26f35d4a71..dec1c4ce628c 100644
--- a/arch/s390/include/asm/dma.h
+++ b/arch/s390/include/asm/dma.h
@@ -11,10 +11,4 @@
*/
#define MAX_DMA_ADDRESS 0x80000000
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 445fe4c8184a..06f795855af7 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -78,7 +78,7 @@ struct aob {
struct aob_rq_header {
struct scm_device *scmdev;
- char data[0];
+ char data[];
};
struct scm_device {
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 17aead80aadb..000de2b1e67a 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -5,24 +5,25 @@
#include <linux/sched.h>
#include <linux/audit.h>
#include <linux/randomize_kstack.h>
-#include <linux/tracehook.h>
#include <linux/processor.h>
#include <linux/uaccess.h>
#include <asm/timex.h>
#include <asm/fpu/api.h>
+#include <asm/pai.h>
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
void do_per_trap(struct pt_regs *regs);
-#ifdef CONFIG_DEBUG_ENTRY
-static __always_inline void arch_check_user_regs(struct pt_regs *regs)
+static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
{
- debug_user_asce(0);
+ if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+ debug_user_asce(0);
+
+ pai_kernel_enter(regs);
}
-#define arch_check_user_regs arch_check_user_regs
-#endif /* CONFIG_DEBUG_ENTRY */
+#define arch_enter_from_user_mode arch_enter_from_user_mode
static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
unsigned long ti_work)
@@ -45,6 +46,8 @@ static __always_inline void arch_exit_to_user_mode(void)
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
debug_user_asce(1);
+
+ pai_kernel_exit(current_pt_regs());
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
@@ -59,7 +62,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static inline bool on_thread_stack(void)
{
- return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
+ return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
}
#endif
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 16dc57dd90b3..af6ba52743e9 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -25,7 +25,7 @@
struct exception_table_entry
{
int insn, fixup;
- long handler;
+ short type, data;
};
extern struct exception_table_entry *__start_amode31_ex_table;
@@ -38,28 +38,6 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
return (unsigned long)&x->fixup + x->fixup;
}
-typedef bool (*ex_handler_t)(const struct exception_table_entry *,
- struct pt_regs *);
-
-static inline ex_handler_t
-ex_fixup_handler(const struct exception_table_entry *x)
-{
- if (likely(!x->handler))
- return NULL;
- return (ex_handler_t)((unsigned long)&x->handler + x->handler);
-}
-
-static inline bool ex_handle(const struct exception_table_entry *x,
- struct pt_regs *regs)
-{
- ex_handler_t handler = ex_fixup_handler(x);
-
- if (unlikely(handler))
- return handler(x, regs);
- regs->psw.addr = extable_fixup(x);
- return true;
-}
-
#define ARCH_HAS_RELATIVE_EXTABLE
static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
@@ -69,8 +47,26 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
{
a->fixup = b->fixup + delta;
b->fixup = tmp.fixup - delta;
- a->handler = b->handler + delta;
- b->handler = tmp.handler - delta;
+ a->type = b->type;
+ b->type = tmp.type;
+ a->data = b->data;
+ b->data = tmp.data;
}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
+#ifdef CONFIG_BPF_JIT
+
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+
+#else /* !CONFIG_BPF_JIT */
+
+static inline bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ return false;
+}
+
+#endif /* CONFIG_BPF_JIT */
+
+bool fixup_exception(struct pt_regs *regs);
#endif
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index cff0749e9657..b8a028a36173 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -214,7 +214,7 @@ struct dcw_intrg_data {
u32 :32;
u64 time;
u64 prog_id;
- u8 prog_data[0];
+ u8 prog_data[];
} __attribute__ ((packed));
#define DCW_FLAGS_CC (1 << (7 - 1))
@@ -241,7 +241,7 @@ struct dcw {
u32 :8;
u32 cd_count:8;
u32 count;
- u8 cd[0];
+ u8 cd[];
} __attribute__ ((packed));
#define TCCB_FORMAT_DEFAULT 0x7f
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index a959b815a58b..b714ed0ef688 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -45,6 +45,7 @@
#define _ASM_S390_FPU_API_H
#include <linux/preempt.h>
+#include <asm/asm-extable.h>
void save_fpu_regs(void);
void load_fpu_regs(void);
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 267f70f4393f..6f80ec9c04be 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -47,15 +47,17 @@ struct ftrace_regs {
static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
- return &fregs->regs;
+ struct pt_regs *regs = &fregs->regs;
+
+ if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS))
+ return regs;
+ return NULL;
}
static __always_inline void ftrace_instruction_pointer_set(struct ftrace_regs *fregs,
unsigned long ip)
{
- struct pt_regs *regs = arch_ftrace_get_regs(fregs);
-
- regs->psw.addr = ip;
+ fregs->regs.psw.addr = ip;
}
/*
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index c22debfcebf1..eaeaeb3ff0be 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -4,6 +4,7 @@
#include <linux/uaccess.h>
#include <linux/futex.h>
+#include <asm/asm-extable.h>
#include <asm/mmu_context.h>
#include <asm/errno.h>
@@ -16,7 +17,8 @@
"3: jl 1b\n" \
" lhi %0,0\n" \
"4: sacf 768\n" \
- EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
+ EX_TABLE(0b,4b) EX_TABLE(1b,4b) \
+ EX_TABLE(2b,4b) EX_TABLE(3b,4b) \
: "=d" (ret), "=&d" (oldval), "=&d" (newval), \
"=m" (*uaddr) \
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 40264f60b0da..5cc46e0dde62 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -147,5 +147,42 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
int gmap_mark_unmergeable(void);
-void s390_reset_acc(struct mm_struct *mm);
+void s390_unlist_old_asce(struct gmap *gmap);
+int s390_replace_asce(struct gmap *gmap);
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool interruptible);
+
+/**
+ * s390_uv_destroy_range - Destroy a range of pages in the given mm.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls, but
+ * it will otherwise only return when it completed.
+ */
+static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ (void)__s390_uv_destroy_range(mm, start, end, false);
+}
+
+/**
+ * s390_uv_destroy_range_interruptible - Destroy a range of pages in the
+ * given mm, but stop when a fatal signal is received.
+ * @mm: the mm on which to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ *
+ * This function will call cond_sched, so it should not generate stalls. If
+ * a fatal signal is received, it will return with -EINTR immediately,
+ * without finishing destroying the whole range. Upon successful
+ * completion, 0 is returned.
+ */
+static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ return __s390_uv_destroy_range(mm, start, end, true);
+}
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 60f9241e5e4a..ccdbccfde148 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -28,9 +28,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
static inline int prepare_hugepage_range(struct file *file,
unsigned long addr, unsigned long len)
{
- if (len & ~HPAGE_MASK)
+ struct hstate *h = hstate_file(file);
+
+ if (len & ~huge_page_mask(h))
return -EINVAL;
- if (addr & ~HPAGE_MASK)
+ if (addr & ~huge_page_mask(h))
return -EINVAL;
return 0;
}
@@ -45,15 +47,15 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
{
if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
+ set_pte(ptep, __pte(_REGION3_ENTRY_EMPTY));
else
- pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+ set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY));
}
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
- huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+ return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
}
static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -85,6 +87,11 @@ static inline int huge_pte_none(pte_t pte)
return pte_none(pte);
}
+static inline int huge_pte_none_mostly(pte_t pte)
+{
+ return huge_pte_none(pte);
+}
+
static inline int huge_pte_write(pte_t pte)
{
return pte_write(pte);
@@ -115,6 +122,21 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
return pte_modify(pte, newprot);
}
+static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t huge_pte_clear_uffd_wp(pte_t pte)
+{
+ return pte;
+}
+
+static inline int huge_pte_uffd_wp(pte_t pte)
+{
+ return 0;
+}
+
static inline bool gigantic_page_runtime_supported(void)
{
return true;
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 6fb7aced104a..40eae2c08d61 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -108,7 +108,7 @@ clear_normalized_cda(struct ccw1 * ccw)
struct idal_buffer {
size_t size;
size_t page_order;
- void *data[0];
+ void *data[];
};
/*
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 3f8ee257f9aa..a405b6bb89fb 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -133,6 +133,8 @@ int ipl_report_add_certificate(struct ipl_report *report, void *key,
* DIAG 308 support
*/
enum diag308_subcode {
+ DIAG308_CLEAR_RESET = 0,
+ DIAG308_LOAD_NORMAL_RESET = 1,
DIAG308_REL_HSA = 2,
DIAG308_LOAD_CLEAR = 3,
DIAG308_LOAD_NORMAL_DUMP = 4,
@@ -141,6 +143,10 @@ enum diag308_subcode {
DIAG308_LOAD_NORMAL = 7,
};
+enum diag308_subcode_flags {
+ DIAG308_FLAG_EI = 1UL << 16,
+};
+
enum diag308_rc {
DIAG308_RC_OK = 0x0001,
DIAG308_RC_NOCONFIG = 0x0102,
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 9f75d67b8c20..89902f754740 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -81,8 +81,13 @@ static __always_inline void inc_irq_stat(enum interruption_class irq)
}
struct ext_code {
- unsigned short subcode;
- unsigned short code;
+ union {
+ struct {
+ unsigned short subcode;
+ unsigned short code;
+ };
+ unsigned int int_code;
+ };
};
typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 916cfcb36d8a..895f774bbcc5 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,6 @@
#include <linux/stringify.h>
#define JUMP_LABEL_NOP_SIZE 6
-#define JUMP_LABEL_NOP_OFFSET 2
#ifdef CONFIG_CC_IS_CLANG
#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
@@ -21,12 +20,12 @@
#endif
/*
- * We use a brcl 0,2 instruction for jump labels at compile time so it
+ * We use a brcl 0,<offset> instruction for jump labels so it
* can be easily distinguished from a hotpatch generated instruction.
*/
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+ asm_volatile_goto("0: brcl 0,%l[label]\n"
".pushsection __jump_table,\"aw\"\n"
".balign 8\n"
".long 0b-.,%l[label]-.\n"
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 7f3c9ac34bd8..1bd08eb56d5f 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -9,6 +9,8 @@
#ifndef _S390_KEXEC_H
#define _S390_KEXEC_H
+#include <linux/module.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/setup.h>
@@ -29,7 +31,7 @@
#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
/* Allocate control page with GFP_DMA */
-#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+#define KEXEC_CONTROL_MEMORY_GFP (GFP_DMA | __GFP_NORETRY)
/* Maximum address we can use for the crash control pages */
#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
@@ -83,4 +85,26 @@ struct kimage_arch {
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
+#ifdef CONFIG_CRASH_DUMP
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+
+void arch_kexec_protect_crashkres(void);
+#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
+
+void arch_kexec_unprotect_crashkres(void);
+#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
+#endif
+
+#ifdef CONFIG_KEXEC_FILE
+struct purgatory_info;
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section,
+ const Elf_Shdr *relsec,
+ const Elf_Shdr *symtab);
+#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+#endif
#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 5eb722c984e4..598095f4b924 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -71,6 +71,7 @@ struct kprobe_ctlblk {
void arch_remove_kprobe(struct kprobe *p);
void __kretprobe_trampoline(void);
+void trampoline_probe_handler(struct pt_regs *regs);
int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
int kprobe_exceptions_notify(struct notifier_block *self,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a604d51acfc8..b1e98a9ed152 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/mmu_notifier.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@@ -45,6 +47,8 @@
#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
#define KVM_REQ_VSIE_RESTART KVM_ARCH_REQ(5)
+#define KVM_REQ_REFRESH_GUEST_PREFIX \
+ KVM_ARCH_REQ_FLAGS(6, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -91,19 +95,30 @@ union ipte_control {
};
};
+union sca_utility {
+ __u16 val;
+ struct {
+ __u16 mtcr : 1;
+ __u16 reserved : 15;
+ };
+};
+
struct bsca_block {
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
- __u64 reserved2;
+ union sca_utility utility;
+ __u8 reserved2[6];
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
};
struct esca_block {
union ipte_control ipte_control;
- __u64 reserved1[7];
+ __u64 reserved1[6];
+ union sca_utility utility;
+ __u8 reserved2[6];
__u64 mcn[4];
- __u64 reserved2[20];
+ __u64 reserved3[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
};
@@ -247,12 +262,16 @@ struct kvm_s390_sie_block {
#define ECB_SPECI 0x08
#define ECB_SRSI 0x04
#define ECB_HOSTPROTINT 0x02
+#define ECB_PTF 0x01
__u8 ecb; /* 0x0061 */
#define ECB2_CMMA 0x80
#define ECB2_IEP 0x20
#define ECB2_PFMFI 0x08
#define ECB2_ESCA 0x04
+#define ECB2_ZPCI_LSI 0x02
__u8 ecb2; /* 0x0062 */
+#define ECB3_AISI 0x20
+#define ECB3_AISII 0x10
#define ECB3_DEA 0x08
#define ECB3_AES 0x04
#define ECB3_RI 0x01
@@ -757,6 +776,7 @@ struct kvm_vm_stat {
u64 inject_pfault_done;
u64 inject_service_signal;
u64 inject_virtio;
+ u64 aen_forward;
};
struct kvm_arch_memory_slot {
@@ -921,6 +941,8 @@ struct kvm_s390_pv {
u64 guest_len;
unsigned long stor_base;
void *stor_var;
+ bool dumping;
+ struct mmu_notifier mmu_notifier;
};
struct kvm_arch{
@@ -937,6 +959,7 @@ struct kvm_arch{
int use_cmma;
int use_pfmfi;
int use_skf;
+ int use_zpci_interp;
int user_cpu_state_ctrl;
int user_sigp;
int user_stsi;
@@ -960,6 +983,8 @@ struct kvm_arch{
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
struct kvm_s390_pv pv;
+ struct list_head kzdev_list;
+ spinlock_t kzdev_list_lock;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -1010,6 +1035,14 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
+#define __KVM_HAVE_ARCH_VM_FREE
+void kvm_arch_free_vm(struct kvm *kvm);
+
+struct zpci_kvm_hook {
+ int (*kvm_register)(void *opaque, struct kvm *kvm);
+ void (*kvm_unregister)(void *opaque);
+};
+
+extern struct zpci_kvm_hook zpci_kvm_hook;
#endif
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 1ffea75b8ebc..c76777b15fec 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -2,27 +2,9 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
-#include <asm/asm-const.h>
#include <linux/stringify.h>
#define __ALIGN .align 16, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
-/*
- * Helper macro for exception table entries
- */
-
-#define __EX_TABLE(_section, _fault, _target) \
- stringify_in_c(.section _section,"a";) \
- stringify_in_c(.align 8;) \
- stringify_in_c(.long (_fault) - .;) \
- stringify_in_c(.long (_target) - .;) \
- stringify_in_c(.quad 0;) \
- stringify_in_c(.previous)
-
-#define EX_TABLE(_fault, _target) \
- __EX_TABLE(__ex_table, _fault, _target)
-#define EX_TABLE_AMODE31(_fault, _target) \
- __EX_TABLE(.amode31.ex_table, _fault, _target)
-
#endif
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
deleted file mode 100644
index 5209f223331a..000000000000
--- a/arch/s390/include/asm/livepatch.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * livepatch.h - s390-specific Kernel Live Patching Core
- *
- * Copyright (c) 2013-2015 SUSE
- * Authors: Jiri Kosina
- * Vojtech Pavlik
- * Jiri Slaby
- */
-
-#ifndef ASM_LIVEPATCH_H
-#define ASM_LIVEPATCH_H
-
-#include <linux/ftrace.h>
-#include <asm/ptrace.h>
-
-static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip)
-{
- ftrace_instruction_pointer_set(fregs, ip);
-}
-
-#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 1262f5003acf..8aa1f6530a3e 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -34,12 +34,22 @@ struct lowcore {
__u32 ext_int_code_addr;
};
__u32 svc_int_code; /* 0x0088 */
- __u16 pgm_ilc; /* 0x008c */
- __u16 pgm_code; /* 0x008e */
+ union {
+ struct {
+ __u16 pgm_ilc; /* 0x008c */
+ __u16 pgm_code; /* 0x008e */
+ };
+ __u32 pgm_int_code;
+ };
__u32 data_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
- __u8 per_code; /* 0x0096 */
- __u8 per_atmid; /* 0x0097 */
+ union {
+ struct {
+ __u8 per_code; /* 0x0096 */
+ __u8 per_atmid; /* 0x0097 */
+ };
+ __u16 per_code_combined;
+ };
__u64 per_address; /* 0x0098 */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
@@ -153,11 +163,9 @@ struct lowcore {
__u64 gmap; /* 0x03d0 */
__u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */
- /* br %r1 trampoline */
- __u16 br_r1_trampoline; /* 0x0400 */
- __u32 return_lpswe; /* 0x0402 */
- __u32 return_mcck_lpswe; /* 0x0406 */
- __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */
+ __u32 return_lpswe; /* 0x0400 */
+ __u32 return_mcck_lpswe; /* 0x0404 */
+ __u8 pad_0x040a[0x0e00-0x0408]; /* 0x0408 */
/*
* 0xe00 contains the address of the IPL Parameter Information
@@ -192,7 +200,12 @@ struct lowcore {
__u64 last_break_save_area; /* 0x1338 */
__u32 access_regs_save_area[16]; /* 0x1340 */
__u64 cregs_save_area[16]; /* 0x1380 */
- __u8 pad_0x1400[0x1800-0x1400]; /* 0x1400 */
+ __u8 pad_0x1400[0x1500-0x1400]; /* 0x1400 */
+ /* Cryptography-counter designation */
+ __u64 ccd; /* 0x1500 */
+ /* AI-extension counter designation */
+ __u64 aicd; /* 0x1508 */
+ __u8 pad_0x1510[0x1800-0x1510]; /* 0x1510 */
/* Transaction abort diagnostic block */
struct pgm_tdb pgm_tdb; /* 0x1800 */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 000000000000..c7fa838cf6b9
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+void memcpy_real_init(void);
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index e12ff0f29d1a..829d68e2c685 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -4,6 +4,7 @@
#include <linux/cpumask.h>
#include <linux/errno.h>
+#include <asm/asm-extable.h>
typedef struct {
spinlock_t lock;
@@ -17,7 +18,7 @@ typedef struct {
unsigned long asce_limit;
unsigned long vdso_base;
/* The mmu context belongs to a secure guest. */
- atomic_t is_protected;
+ atomic_t protected_count;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
@@ -41,18 +42,4 @@ typedef struct {
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
-static inline int tprot(unsigned long addr)
-{
- int rc = -EFAULT;
-
- asm volatile(
- " tprot 0(%1),0\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (rc) : "a" (addr) : "cc");
- return rc;
-}
-
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c7937f369e62..2a38af5a00c2 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -26,7 +26,7 @@ static inline int init_new_context(struct task_struct *tsk,
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
- atomic_set(&mm->context.is_protected, 0);
+ atomic_set(&mm->context.protected_count, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
#ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 2db45d7e68aa..af1cd3a6f406 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -6,7 +6,6 @@
* Author(s): Ingo Adlung <adlung@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
* Cornelia Huck <cornelia.huck@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#ifndef _ASM_S390_NMI_H
@@ -98,11 +97,11 @@ struct mcesa {
struct pt_regs;
-void nmi_alloc_boot_cpu(struct lowcore *lc);
-int nmi_alloc_per_cpu(struct lowcore *lc);
-void nmi_free_per_cpu(struct lowcore *lc);
+void nmi_alloc_mcesa_early(u64 *mcesad);
+int nmi_alloc_mcesa(u64 *mcesad);
+void nmi_free_mcesa(u64 *mcesad);
-void s390_handle_mcck(void);
+void s390_handle_mcck(struct pt_regs *regs);
void __s390_handle_mcck(void);
int s390_do_machine_check(struct pt_regs *regs);
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 0033dcd663b1..7e9e99523e95 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -2,23 +2,24 @@
#ifndef _ASM_S390_NOSPEC_ASM_H
#define _ASM_S390_NOSPEC_ASM_H
-#include <asm/alternative-asm.h>
-#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
#ifdef __ASSEMBLY__
#ifdef CC_USING_EXPOLINE
-_LC_BR_R1 = __LC_BR_R1
-
/*
* The expoline macros are used to create thunks in the same format
* as gcc generates them. The 'comdat' section flag makes sure that
* the various thunks are merged into a single copy.
*/
.macro __THUNK_PROLOG_NAME name
+#ifdef CONFIG_EXPOLINE_EXTERN
+ .pushsection .text,"ax",@progbits
+ .align 16,0x07
+#else
.pushsection .text.\name,"axG",@progbits,\name,comdat
+#endif
.globl \name
.hidden \name
.type \name,@function
@@ -26,167 +27,101 @@ _LC_BR_R1 = __LC_BR_R1
CFI_STARTPROC
.endm
- .macro __THUNK_EPILOG
+ .macro __THUNK_EPILOG_NAME name
CFI_ENDPROC
+#ifdef CONFIG_EXPOLINE_EXTERN
+ .size \name, .-\name
+#endif
.popsection
.endm
- .macro __THUNK_PROLOG_BR r1,r2
- __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1
- .endm
-
- .macro __THUNK_PROLOG_BC d0,r1,r2
- __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1
+ .macro __THUNK_PROLOG_BR r1
+ __THUNK_PROLOG_NAME __s390_indirect_jump_r\r1
.endm
- .macro __THUNK_BR r1,r2
- jg __s390_indirect_jump_r\r2\()use_r\r1
+ .macro __THUNK_EPILOG_BR r1
+ __THUNK_EPILOG_NAME __s390_indirect_jump_r\r1
.endm
- .macro __THUNK_BC d0,r1,r2
- jg __s390_indirect_branch_\d0\()_\r2\()use_\r1
+ .macro __THUNK_BR r1
+ jg __s390_indirect_jump_r\r1
.endm
- .macro __THUNK_BRASL r1,r2,r3
- brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2
+ .macro __THUNK_BRASL r1,r2
+ brasl \r1,__s390_indirect_jump_r\r2
.endm
- .macro __DECODE_RR expand,reg,ruse
- .set __decode_fail,1
+ .macro __DECODE_R expand,reg
+ .set .L__decode_fail,1
.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \reg,%r\r1
- .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- .ifc \ruse,%r\r2
- \expand \r1,\r2
- .set __decode_fail,0
- .endif
- .endr
+ \expand \r1
+ .set .L__decode_fail,0
.endif
.endr
- .if __decode_fail == 1
- .error "__DECODE_RR failed"
+ .if .L__decode_fail == 1
+ .error "__DECODE_R failed"
.endif
.endm
- .macro __DECODE_RRR expand,rsave,rtarget,ruse
- .set __decode_fail,1
+ .macro __DECODE_RR expand,rsave,rtarget
+ .set .L__decode_fail,1
.irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \rsave,%r\r1
.irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
.ifc \rtarget,%r\r2
- .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- .ifc \ruse,%r\r3
- \expand \r1,\r2,\r3
- .set __decode_fail,0
- .endif
- .endr
- .endif
- .endr
- .endif
- .endr
- .if __decode_fail == 1
- .error "__DECODE_RRR failed"
- .endif
- .endm
-
- .macro __DECODE_DRR expand,disp,reg,ruse
- .set __decode_fail,1
- .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- .ifc \reg,%r\r1
- .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- .ifc \ruse,%r\r2
- \expand \disp,\r1,\r2
- .set __decode_fail,0
+ \expand \r1,\r2
+ .set .L__decode_fail,0
.endif
.endr
.endif
.endr
- .if __decode_fail == 1
- .error "__DECODE_DRR failed"
+ .if .L__decode_fail == 1
+ .error "__DECODE_RR failed"
.endif
.endm
- .macro __THUNK_EX_BR reg,ruse
- # Be very careful when adding instructions to this macro!
- # The ALTERNATIVE replacement code has a .+10 which targets
- # the "br \reg" after the code has been patched.
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ .macro __THUNK_EX_BR reg
exrl 0,555f
j .
-#else
- .ifc \reg,%r1
- ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
- j .
- .else
- larl \ruse,555f
- ex 0,0(\ruse)
- j .
- .endif
-#endif
555: br \reg
.endm
- .macro __THUNK_EX_BC disp,reg,ruse
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
- exrl 0,556f
- j .
+#ifdef CONFIG_EXPOLINE_EXTERN
+ .macro GEN_BR_THUNK reg
+ .endm
+ .macro GEN_BR_THUNK_EXTERN reg
#else
- larl \ruse,556f
- ex 0,0(\ruse)
- j .
+ .macro GEN_BR_THUNK reg
#endif
-556: b \disp(\reg)
- .endm
-
- .macro GEN_BR_THUNK reg,ruse=%r1
- __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
- __THUNK_EX_BR \reg,\ruse
- __THUNK_EPILOG
- .endm
-
- .macro GEN_B_THUNK disp,reg,ruse=%r1
- __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
- __THUNK_EX_BC \disp,\reg,\ruse
- __THUNK_EPILOG
+ __DECODE_R __THUNK_PROLOG_BR,\reg
+ __THUNK_EX_BR \reg
+ __DECODE_R __THUNK_EPILOG_BR,\reg
.endm
- .macro BR_EX reg,ruse=%r1
-557: __DECODE_RR __THUNK_BR,\reg,\ruse
+ .macro BR_EX reg
+557: __DECODE_R __THUNK_BR,\reg
.pushsection .s390_indirect_branches,"a",@progbits
.long 557b-.
.popsection
.endm
- .macro B_EX disp,reg,ruse=%r1
-558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 558b-.
- .popsection
- .endm
-
- .macro BASR_EX rsave,rtarget,ruse=%r1
-559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+ .macro BASR_EX rsave,rtarget
+559: __DECODE_RR __THUNK_BRASL,\rsave,\rtarget
.pushsection .s390_indirect_branches,"a",@progbits
.long 559b-.
.popsection
.endm
#else
- .macro GEN_BR_THUNK reg,ruse=%r1
- .endm
-
- .macro GEN_B_THUNK disp,reg,ruse=%r1
+ .macro GEN_BR_THUNK reg
.endm
- .macro BR_EX reg,ruse=%r1
+ .macro BR_EX reg
br \reg
.endm
- .macro B_EX disp,reg,ruse=%r1
- b \disp(\reg)
- .endm
-
- .macro BASR_EX rsave,rtarget,ruse=%r1
+ .macro BASR_EX rsave,rtarget
basr \rsave,\rtarget
.endm
#endif /* CC_USING_EXPOLINE */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 3c89279d2a4b..0d1c74a7a650 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -8,6 +8,8 @@
#ifndef _ASM_S390_OS_INFO_H
#define _ASM_S390_OS_INFO_H
+#include <linux/uio.h>
+
#define OS_INFO_VERSION_MAJOR 1
#define OS_INFO_VERSION_MINOR 1
#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
@@ -39,7 +41,6 @@ u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
-int copy_oldmem_kernel(void *dst, void *src, size_t count);
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index d98d17a36c7b..61dea67bb9c7 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -20,6 +20,8 @@
#define PAGE_SIZE _PAGE_SIZE
#define PAGE_MASK _PAGE_MASK
#define PAGE_DEFAULT_ACC 0
+/* storage-protection override */
+#define PAGE_SPO_ACC 9
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
#define HPAGE_SHIFT 20
@@ -90,11 +92,31 @@ typedef pte_t *pgtable_t;
#define pgprot_val(x) ((x).pgprot)
#define pgste_val(x) ((x).pgste)
-#define pte_val(x) ((x).pte)
-#define pmd_val(x) ((x).pmd)
-#define pud_val(x) ((x).pud)
-#define p4d_val(x) ((x).p4d)
-#define pgd_val(x) ((x).pgd)
+
+static inline unsigned long pte_val(pte_t pte)
+{
+ return pte.pte;
+}
+
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+ return pmd.pmd;
+}
+
+static inline unsigned long pud_val(pud_t pud)
+{
+ return pud.pud;
+}
+
+static inline unsigned long p4d_val(p4d_t p4d)
+{
+ return p4d.p4d;
+}
+
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+ return pgd.pgd;
+}
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
new file mode 100644
index 000000000000..1a8a6b15d121
--- /dev/null
+++ b/arch/s390/include/asm/pai.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Processor Activity Instrumentation support for cryptography counters
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef _ASM_S390_PAI_H
+#define _ASM_S390_PAI_H
+
+#include <linux/jump_label.h>
+#include <asm/lowcore.h>
+#include <asm/ptrace.h>
+
+struct qpaci_info_block {
+ u64 header;
+ struct {
+ u64 : 8;
+ u64 num_cc : 8; /* # of supported crypto counters */
+ u64 : 9;
+ u64 num_nnpa : 7; /* # of supported NNPA counters */
+ u64 : 32;
+ };
+};
+
+static inline int qpaci(struct qpaci_info_block *info)
+{
+ /* Size of info (in double words minus one) */
+ size_t size = sizeof(*info) / sizeof(u64) - 1;
+ int cc;
+
+ asm volatile(
+ " lgr 0,%[size]\n"
+ " .insn s,0xb28f0000,%[info]\n"
+ " lgr %[size],0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc), [info] "=Q" (*info), [size] "+&d" (size)
+ :
+ : "0", "cc", "memory");
+ return cc ? (size + 1) * sizeof(u64) : 0;
+}
+
+#define PAI_CRYPTO_BASE 0x1000 /* First event number */
+#define PAI_CRYPTO_MAXCTR 256 /* Max # of event counters */
+#define PAI_CRYPTO_KERNEL_OFFSET 2048
+#define PAI_NNPA_BASE 0x1800 /* First event number */
+#define PAI_NNPA_MAXCTR 128 /* Max # of event counters */
+
+DECLARE_STATIC_KEY_FALSE(pai_key);
+
+static __always_inline void pai_kernel_enter(struct pt_regs *regs)
+{
+ if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+ return;
+ if (!static_branch_unlikely(&pai_key))
+ return;
+ if (!S390_lowcore.ccd)
+ return;
+ if (!user_mode(regs))
+ return;
+ WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+static __always_inline void pai_kernel_exit(struct pt_regs *regs)
+{
+ if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+ return;
+ if (!static_branch_unlikely(&pai_key))
+ return;
+ if (!S390_lowcore.ccd)
+ return;
+ if (!user_mode(regs))
+ return;
+ WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 90824be5ce9a..108e732d7b14 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,9 +6,9 @@
#include <linux/mutex.h>
#include <linux/iommu.h>
#include <linux/pci_hotplug.h>
-#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
+#include <asm/pci_insn.h>
#include <asm/sclp.h>
#define PCIBIOS_MIN_IO 0x1000
@@ -97,6 +97,7 @@ struct zpci_bar_struct {
};
struct s390_domain;
+struct kvm_zdev;
#define ZPCI_FUNCTIONS_PER_BUS 256
struct zpci_bus {
@@ -116,18 +117,20 @@ struct zpci_bus {
struct zpci_dev {
struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
- struct list_head bus_next;
struct kref kref;
struct hotplug_slot hotplug_slot;
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */
+ u32 gisa; /* GISA designation for passthrough */
u16 vfn; /* virtual function number */
u16 pchid; /* physical channel ID */
+ u16 maxstbl; /* Maximum store block size */
u8 pfgid; /* function group ID */
u8 pft; /* pci function type */
u8 port;
+ u8 dtsm; /* Supported DT mask */
u8 rid_available : 1;
u8 has_hp_slot : 1;
u8 has_resources : 1;
@@ -186,7 +189,10 @@ struct zpci_dev {
struct dentry *debugfs_dev;
+ /* IOMMU and passthrough */
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
+ struct kvm_zdev *kzdev;
+ struct mutex kzdev_lock;
};
static inline bool zdev_enabled(struct zpci_dev *zdev)
@@ -198,6 +204,9 @@ extern const struct attribute_group *zpci_attr_groups[];
extern unsigned int s390_pci_force_floating __initdata;
extern unsigned int s390_pci_no_rid;
+extern union zpci_sic_iib *zpci_aipb;
+extern struct airq_iv *zpci_aif_sbv;
+
/* -----------------------------------------------------------------------------
Prototypes
----------------------------------------------------------------------------- */
@@ -283,9 +292,6 @@ int zpci_dma_exit_device(struct zpci_dev *zdev);
int __init zpci_irq_init(void);
void __init zpci_irq_exit(void);
-int zpci_set_irq(struct zpci_dev *zdev);
-int zpci_clear_irq(struct zpci_dev *zdev);
-
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 1f4b666e85ee..d6189ed14f84 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -153,9 +153,11 @@ struct clp_rsp_query_pci_grp {
u8 : 6;
u8 frame : 1;
u8 refresh : 1; /* TLB refresh mode */
- u16 reserved2;
+ u16 : 3;
+ u16 maxstbl : 13; /* Maximum store block size */
u16 mui;
- u16 : 16;
+ u8 dtsm; /* Supported DT mask */
+ u8 reserved3;
u16 maxfaal;
u16 : 4;
u16 dnoi : 12;
@@ -173,7 +175,8 @@ struct clp_req_set_pci {
u16 reserved2;
u8 oc; /* operation controls */
u8 ndas; /* number of dma spaces */
- u64 reserved3;
+ u32 reserved3;
+ u32 gisa; /* GISA designation */
} __packed;
/* Set PCI function response */
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
index 5dfe47588277..3bb4e7e33a0e 100644
--- a/arch/s390/include/asm/pci_debug.h
+++ b/arch/s390/include/asm/pci_debug.h
@@ -17,9 +17,14 @@ extern debug_info_t *pci_debug_err_id;
debug_text_event(pci_debug_err_id, 0, debug_buffer); \
} while (0)
+static inline void zpci_err_hex_level(int level, void *addr, int len)
+{
+ debug_event(pci_debug_err_id, level, addr, len);
+}
+
static inline void zpci_err_hex(void *addr, int len)
{
- debug_event(pci_debug_err_id, 0, addr, len);
+ zpci_err_hex_level(0, addr, len);
}
#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 3b8e89d4578a..91e63426bdc5 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -97,23 +97,23 @@ static inline unsigned int calc_px(dma_addr_t ptr)
return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
}
-static inline void set_pt_pfaa(unsigned long *entry, void *pfaa)
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
{
*entry &= ZPCI_PTE_FLAG_MASK;
- *entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK);
+ *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
}
-static inline void set_rt_sto(unsigned long *entry, void *sto)
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
{
*entry &= ZPCI_RTE_FLAG_MASK;
- *entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= (sto & ZPCI_RTE_ADDR_MASK);
*entry |= ZPCI_TABLE_TYPE_RTX;
}
-static inline void set_st_pto(unsigned long *entry, void *pto)
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
{
*entry &= ZPCI_STE_FLAG_MASK;
- *entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK);
+ *entry |= (pto & ZPCI_STE_ADDR_MASK);
*entry |= ZPCI_TABLE_TYPE_SX;
}
@@ -169,16 +169,19 @@ static inline int pt_entry_isvalid(unsigned long entry)
static inline unsigned long *get_rt_sto(unsigned long entry)
{
- return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
- ? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK)
- : NULL;
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+
}
static inline unsigned long *get_st_pto(unsigned long entry)
{
- return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
- ? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK)
- : NULL;
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+ else
+ return NULL;
}
/* Prototypes */
@@ -186,7 +189,7 @@ void dma_free_seg_table(unsigned long);
unsigned long *dma_alloc_cpu_table(void);
void dma_cleanup_tables(unsigned long *);
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
-void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
extern const struct dma_map_ops s390_pci_dma_ops;
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index 61cf9531f68f..e5f57cfe1d45 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -98,6 +98,15 @@ struct zpci_fib {
u32 gd;
} __packed __aligned(8);
+/* Set Interruption Controls Operation Controls */
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+#define SIC_SET_AENI_CONTROLS 2
+#define SIC_IRQ_MODE_DIRECT 4
+#define SIC_IRQ_MODE_D_ALL 16
+#define SIC_IRQ_MODE_D_SINGLE 17
+#define SIC_IRQ_MODE_SET_CPU 18
+
/* directed interruption information block */
struct zpci_diib {
u32 : 1;
@@ -119,9 +128,20 @@ struct zpci_cdiib {
u64 : 64;
} __packed __aligned(8);
+/* adapter interruption parameters block */
+struct zpci_aipb {
+ u64 faisb;
+ u64 gait;
+ u16 : 13;
+ u16 afi : 3;
+ u32 : 32;
+ u16 faal;
+} __packed __aligned(8);
+
union zpci_sic_iib {
struct zpci_diib diib;
struct zpci_cdiib cdiib;
+ struct zpci_aipb aipb;
};
DECLARE_STATIC_KEY_FALSE(have_mio);
@@ -134,13 +154,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
int __zpci_store_block(const u64 *data, u64 req, u64 offset);
void zpci_barrier(void);
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
-
-static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
-{
- union zpci_sic_iib iib = {{0}};
-
- return __zpci_set_irq_ctrl(ctl, isc, &iib);
-}
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f14a555eff74..17eb618f1348 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -103,17 +103,17 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
- pgd_val(*pgd) = _REGION1_ENTRY | __pa(p4d);
+ set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d)));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
- p4d_val(*p4d) = _REGION2_ENTRY | __pa(pud);
+ set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud)));
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
- pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
+ set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd)));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -129,7 +129,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline void pmd_populate(struct mm_struct *mm,
pmd_t *pmd, pgtable_t pte)
{
- pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
+ set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte)));
}
#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 008a6c856fa4..f1cb9391190d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -181,6 +181,8 @@ static inline int is_module_addr(void *addr)
#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
+
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
@@ -422,23 +424,6 @@ static inline int is_module_addr(void *addr)
* implies read permission.
*/
/*xwr*/
-#define __P000 PAGE_NONE
-#define __P001 PAGE_RO
-#define __P010 PAGE_RO
-#define __P011 PAGE_RO
-#define __P100 PAGE_RX
-#define __P101 PAGE_RX
-#define __P110 PAGE_RX
-#define __P111 PAGE_RX
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_RO
-#define __S010 PAGE_RW
-#define __S011 PAGE_RW
-#define __S100 PAGE_RX
-#define __S101 PAGE_RX
-#define __S110 PAGE_RWX
-#define __S111 PAGE_RWX
/*
* Segment entry (large page) protection definitions.
@@ -523,7 +508,7 @@ static inline int mm_has_pgste(struct mm_struct *mm)
static inline int mm_is_protected(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
- if (unlikely(atomic_read(&mm->context.is_protected)))
+ if (unlikely(atomic_read(&mm->context.protected_count)))
return 1;
#endif
return 0;
@@ -538,6 +523,36 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
return 0;
}
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+ return __pte(pte_val(pte) & ~pgprot_val(prot));
+}
+
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
+{
+ return __pte(pte_val(pte) | pgprot_val(prot));
+}
+
+static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+ return __pmd(pmd_val(pmd) & ~pgprot_val(prot));
+}
+
+static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+ return __pmd(pmd_val(pmd) | pgprot_val(prot));
+}
+
+static inline pud_t clear_pud_bit(pud_t pud, pgprot_t prot)
+{
+ return __pud(pud_val(pud) & ~pgprot_val(prot));
+}
+
+static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
+{
+ return __pud(pud_val(pud) | pgprot_val(prot));
+}
+
/*
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
@@ -570,7 +585,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
unsigned long address = (unsigned long)ptr | 1;
asm volatile(
- " .insn rre,0xb98a0000,%[r1],%[address]"
+ " cspg %[r1],%[address]"
: [r1] "+&d" (r1.pair), "+m" (*ptr)
: [address] "d" (address)
: "cc");
@@ -796,6 +811,22 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif
+#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE
+static inline int pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return set_pte_bit(pte, __pgprot(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return clear_pte_bit(pte, __pgprot(_PAGE_SWP_EXCLUSIVE));
+}
+
static inline int pte_soft_dirty(pte_t pte)
{
return pte_val(pte) & _PAGE_SOFT_DIRTY;
@@ -804,15 +835,13 @@ static inline int pte_soft_dirty(pte_t pte)
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
- pte_val(pte) |= _PAGE_SOFT_DIRTY;
- return pte;
+ return set_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY));
}
#define pte_swp_mksoft_dirty pte_mksoft_dirty
static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
- return pte;
+ return clear_pte_bit(pte, __pgprot(_PAGE_SOFT_DIRTY));
}
#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
@@ -823,14 +852,12 @@ static inline int pmd_soft_dirty(pmd_t pmd)
static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
- return pmd;
+ return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
}
static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
{
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
- return pmd;
+ return clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_SOFT_DIRTY));
}
/*
@@ -881,32 +908,57 @@ static inline pgprot_t pte_pgprot(pte_t pte)
* pgd/pmd/pte modification functions
*/
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ WRITE_ONCE(*pgdp, pgd);
+}
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ WRITE_ONCE(*p4dp, p4d);
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+ WRITE_ONCE(*pudp, pud);
+}
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+ WRITE_ONCE(*pmdp, pmd);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+ WRITE_ONCE(*ptep, pte);
+}
+
static inline void pgd_clear(pgd_t *pgd)
{
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
- pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
+ set_pgd(pgd, __pgd(_REGION1_ENTRY_EMPTY));
}
static inline void p4d_clear(p4d_t *p4d)
{
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
+ set_p4d(p4d, __p4d(_REGION2_ENTRY_EMPTY));
}
static inline void pud_clear(pud_t *pud)
{
if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+ set_pud(pud, __pud(_REGION3_ENTRY_EMPTY));
}
static inline void pmd_clear(pmd_t *pmdp)
{
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- pte_val(*ptep) = _PAGE_INVALID;
+ set_pte(ptep, __pte(_PAGE_INVALID));
}
/*
@@ -915,79 +967,74 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
*/
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- pte_val(pte) &= _PAGE_CHG_MASK;
- pte_val(pte) |= pgprot_val(newprot);
+ pte = clear_pte_bit(pte, __pgprot(~_PAGE_CHG_MASK));
+ pte = set_pte_bit(pte, newprot);
/*
* newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX
* has the invalid bit set, clear it again for readable, young pages
*/
if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
- pte_val(pte) &= ~_PAGE_INVALID;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID));
/*
* newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page
* protection bit set, clear it again for writable, dirty pages
*/
if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
- pte_val(pte) &= ~_PAGE_PROTECT;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
return pte;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_WRITE;
- pte_val(pte) |= _PAGE_PROTECT;
- return pte;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_WRITE));
+ return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
}
static inline pte_t pte_mkwrite(pte_t pte)
{
- pte_val(pte) |= _PAGE_WRITE;
+ pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE));
if (pte_val(pte) & _PAGE_DIRTY)
- pte_val(pte) &= ~_PAGE_PROTECT;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
return pte;
}
static inline pte_t pte_mkclean(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_DIRTY;
- pte_val(pte) |= _PAGE_PROTECT;
- return pte;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_DIRTY));
+ return set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
+ pte = set_pte_bit(pte, __pgprot(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
if (pte_val(pte) & _PAGE_WRITE)
- pte_val(pte) &= ~_PAGE_PROTECT;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_PROTECT));
return pte;
}
static inline pte_t pte_mkold(pte_t pte)
{
- pte_val(pte) &= ~_PAGE_YOUNG;
- pte_val(pte) |= _PAGE_INVALID;
- return pte;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_YOUNG));
+ return set_pte_bit(pte, __pgprot(_PAGE_INVALID));
}
static inline pte_t pte_mkyoung(pte_t pte)
{
- pte_val(pte) |= _PAGE_YOUNG;
+ pte = set_pte_bit(pte, __pgprot(_PAGE_YOUNG));
if (pte_val(pte) & _PAGE_READ)
- pte_val(pte) &= ~_PAGE_INVALID;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_INVALID));
return pte;
}
static inline pte_t pte_mkspecial(pte_t pte)
{
- pte_val(pte) |= _PAGE_SPECIAL;
- return pte;
+ return set_pte_bit(pte, __pgprot(_PAGE_SPECIAL));
}
#ifdef CONFIG_HUGETLB_PAGE
static inline pte_t pte_mkhuge(pte_t pte)
{
- pte_val(pte) |= _PAGE_LARGE;
- return pte;
+ return set_pte_bit(pte, __pgprot(_PAGE_LARGE));
}
#endif
@@ -1006,7 +1053,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
if (__builtin_constant_p(opt) && opt == 0) {
/* Invalidation + TLB flush for the pte */
asm volatile(
- " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+ " ipte %[r1],%[r2],0,%[m4]"
: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
[m4] "i" (local));
return;
@@ -1015,7 +1062,7 @@ static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
/* Invalidate ptes with options + TLB flush of the ptes */
opt = opt | (asce & _ASCE_ORIGIN);
asm volatile(
- " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ " ipte %[r1],%[r2],%[r3],%[m4]"
: [r2] "+a" (address), [r3] "+a" (opt)
: [r1] "a" (pto), [m4] "i" (local) : "memory");
}
@@ -1028,7 +1075,7 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr,
/* Invalidate a range of ptes + TLB flush of the ptes */
do {
asm volatile(
- " .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+ " ipte %[r1],%[r2],%[r3],%[m4]"
: [r2] "+a" (address), [r3] "+a" (nr)
: [r1] "a" (pto), [m4] "i" (local) : "memory");
} while (nr != 255);
@@ -1114,13 +1161,26 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
if (full) {
res = *ptep;
- *ptep = __pte(_PAGE_INVALID);
+ set_pte(ptep, __pte(_PAGE_INVALID));
} else {
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- /* At this point the reference through the mapping is still present */
- if (mm_is_protected(mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ /* Nothing to do */
+ if (!mm_is_protected(mm) || !pte_present(res))
+ return res;
+ /*
+ * At this point the reference through the mapping is still present.
+ * The notifier should have destroyed all protected vCPUs at this
+ * point, so the destroy should be successful.
+ */
+ if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
+ return res;
+ /*
+ * If something went wrong and the page could not be destroyed, or
+ * if this is not a mm teardown, the slower export is used as
+ * fallback instead.
+ */
+ uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
return res;
}
@@ -1198,11 +1258,11 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
if (pte_present(entry))
- pte_val(entry) &= ~_PAGE_UNUSED;
+ entry = clear_pte_bit(entry, __pgprot(_PAGE_UNUSED));
if (mm_has_pgste(mm))
ptep_set_pte_at(mm, addr, ptep, entry);
else
- *ptep = entry;
+ set_pte(ptep, entry);
}
/*
@@ -1213,9 +1273,9 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
- pte_val(__pte) = physpage | pgprot_val(pgprot);
+ __pte = __pte(physpage | pgprot_val(pgprot));
if (!MACHINE_HAS_NX)
- pte_val(__pte) &= ~_PAGE_NOEXEC;
+ __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC));
return pte_mkyoung(__pte);
}
@@ -1355,61 +1415,57 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
+ return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE));
if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
return pmd;
}
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY));
+ return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY));
if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
return pmd;
}
static inline pud_t pud_wrprotect(pud_t pud)
{
- pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
- pud_val(pud) |= _REGION_ENTRY_PROTECT;
- return pud;
+ pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE));
+ return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
}
static inline pud_t pud_mkwrite(pud_t pud)
{
- pud_val(pud) |= _REGION3_ENTRY_WRITE;
+ pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_WRITE));
if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
return pud;
}
static inline pud_t pud_mkclean(pud_t pud)
{
- pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
- pud_val(pud) |= _REGION_ENTRY_PROTECT;
- return pud;
+ pud = clear_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY));
+ return set_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+ pud = set_pud_bit(pud, __pgprot(_REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY));
if (pud_val(pud) & _REGION3_ENTRY_WRITE)
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ pud = clear_pud_bit(pud, __pgprot(_REGION_ENTRY_PROTECT));
return pud;
}
@@ -1433,37 +1489,39 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- return pmd;
+ pmd = clear_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
+ return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
- _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
- _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
- pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ unsigned long mask;
+
+ mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ mask |= _SEGMENT_ENTRY_DIRTY;
+ mask |= _SEGMENT_ENTRY_YOUNG;
+ mask |= _SEGMENT_ENTRY_LARGE;
+ mask |= _SEGMENT_ENTRY_SOFT_DIRTY;
+ pmd = __pmd(pmd_val(pmd) & mask);
+ pmd = set_pmd_bit(pmd, __pgprot(massage_pgprot_pmd(newprot)));
if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_INVALID));
return pmd;
}
static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
- pmd_t __pmd;
- pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return __pmd;
+ return __pmd(physpage + massage_pgprot_pmd(pgprot));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
@@ -1491,7 +1549,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
if (__builtin_constant_p(opt) && opt == 0) {
/* flush without guest asce */
asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ " idte %[r1],0,%[r2],%[m4]"
: "+m" (*pmdp)
: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK)),
[m4] "i" (local)
@@ -1499,7 +1557,7 @@ static __always_inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
} else {
/* flush with guest asce */
asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ " idte %[r1],%[r3],%[r2],%[m4]"
: "+m" (*pmdp)
: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
[r3] "a" (asce), [m4] "i" (local)
@@ -1518,7 +1576,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp,
if (__builtin_constant_p(opt) && opt == 0) {
/* flush without guest asce */
asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+ " idte %[r1],0,%[r2],%[m4]"
: "+m" (*pudp)
: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK)),
[m4] "i" (local)
@@ -1526,7 +1584,7 @@ static __always_inline void __pudp_idte(unsigned long addr, pud_t *pudp,
} else {
/* flush with guest asce */
asm volatile(
- " .insn rrf,0xb98e0000,%[r1],%[r2],%[r3],%[m4]"
+ " idte %[r1],%[r3],%[r2],%[m4]"
: "+m" (*pudp)
: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
[r3] "a" (asce), [m4] "i" (local)
@@ -1585,16 +1643,15 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
if (!MACHINE_HAS_NX)
- pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC;
- *pmdp = entry;
+ entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
+ set_pmd(pmdp, entry);
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_LARGE));
+ pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_YOUNG));
+ return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT));
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -1611,7 +1668,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
{
if (full) {
pmd_t pmd = *pmdp;
- *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
+ set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
return pmd;
}
return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
@@ -1669,18 +1726,18 @@ static inline int has_transparent_hugepage(void)
/*
* 64 bit swap entry format:
* A page-table entry has some bits we have to treat in a special way.
- * Bits 52 and bit 55 have to be zero, otherwise a specification
- * exception will occur instead of a page translation exception. The
- * specification exception has the bad habit not to store necessary
- * information in the lowcore.
- * Bits 54 and 63 are used to indicate the page type.
+ * Bits 54 and 63 are used to indicate the page type. Bit 53 marks the pte
+ * as invalid.
* A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
- * This leaves the bits 0-51 and bits 56-62 to store type and offset.
- * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
- * for the offset.
- * | offset |01100|type |00|
+ * | offset |E11XX|type |S0|
* |0000000000111111111122222222223333333333444444444455|55555|55566|66|
* |0123456789012345678901234567890123456789012345678901|23456|78901|23|
+ *
+ * Bits 0-51 store the offset.
+ * Bit 52 (E) is used to remember PG_anon_exclusive.
+ * Bits 57-61 store the type.
+ * Bit 62 (S) is used for softdirty tracking.
+ * Bits 55 and 56 (X) are unused.
*/
#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
@@ -1690,12 +1747,12 @@ static inline int has_transparent_hugepage(void)
static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
{
- pte_t pte;
+ unsigned long pteval;
- pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
- pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
- pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
- return pte;
+ pteval = _PAGE_INVALID | _PAGE_PROTECT;
+ pteval |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+ pteval |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
+ return __pte(pteval);
}
static inline unsigned long __swp_type(swp_entry_t entry)
@@ -1720,6 +1777,10 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index d9d5350cc3ec..bf15da0fedbc 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void)
static inline void __preempt_count_add(int val)
{
- if (__builtin_constant_p(val) && (val >= -128) && (val <= 127))
- __atomic_add_const(val, &S390_lowcore.preempt_count);
- else
- __atomic_add(val, &S390_lowcore.preempt_count);
+ /*
+ * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
+ * enabled, gcc 12 fails to handle __builtin_constant_p().
+ */
+ if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
+ if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
+ __atomic_add_const(val, &S390_lowcore.preempt_count);
+ return;
+ }
+ }
+ __atomic_add(val, &S390_lowcore.preempt_count);
}
static inline void __preempt_count_sub(int val)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f54c152bf2bf..87be3e855bf7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -83,21 +83,22 @@ void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern void execve_tail(void);
extern void __bpon(void);
+unsigned long vdso_size(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
*/
-#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
+#define TASK_SIZE (test_thread_flag(TIF_31BIT) ? \
_REGION3_SIZE : TASK_SIZE_MAX)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
-#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_SIZE_MAX (-PAGE_SIZE)
-#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
- _REGION3_SIZE : _REGION2_SIZE)
-#define STACK_TOP_MAX _REGION2_SIZE
+#define VDSO_BASE (STACK_TOP + PAGE_SIZE)
+#define VDSO_LIMIT (test_thread_flag(TIF_31BIT) ? _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP (VDSO_LIMIT - vdso_size() - PAGE_SIZE)
+#define STACK_TOP_MAX (_REGION2_SIZE - vdso_size() - PAGE_SIZE)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -185,9 +186,6 @@ struct pt_regs;
void show_registers(struct pt_regs *regs);
void show_cacheinfo(struct seq_file *m);
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *tsk) { }
-
/* Free guarded storage control block */
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);
@@ -201,13 +199,7 @@ unsigned long __get_wchan(struct task_struct *p);
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
-static __always_inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-
- asm volatile("la %0,0(15)" : "=a" (sp));
- return sp;
-}
+register unsigned long current_stack_pointer asm("r15");
static __always_inline unsigned short stap(void)
{
@@ -226,8 +218,7 @@ static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
{
unsigned long val;
- asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
- : "=d" (val) : "a" (asi << 8 | parm));
+ asm volatile("ecag %0,0,0(%1)" : "=d" (val) : "a" (asi << 8 | parm));
return val;
}
@@ -310,24 +301,8 @@ static __always_inline void __noreturn disabled_wait(void)
while (1);
}
-/*
- * Basic Program Check Handler.
- */
-extern void s390_base_pgm_handler(void);
-extern void (*s390_base_pgm_handler_fn)(void);
-
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
-extern int memcpy_real(void *, void *, size_t);
-extern void memcpy_absolute(void *, void *, size_t);
-
-#define mem_assign_absolute(dest, val) do { \
- __typeof__(dest) __tmp = (val); \
- \
- BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
- memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
-} while (0)
-
extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 4ffa8e7f0ed3..8bae33ab320a 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -15,11 +15,13 @@
#define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */
#define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */
#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */
+#define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */
#define _PIF_SYSCALL BIT(PIF_SYSCALL)
#define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART)
#define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET)
#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT)
+#define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS)
#ifndef __ASSEMBLY__
@@ -69,6 +71,35 @@ enum {
&(*(struct psw_bits *)(&(__psw))); \
}))
+#define PSW32_MASK_PER 0x40000000UL
+#define PSW32_MASK_DAT 0x04000000UL
+#define PSW32_MASK_IO 0x02000000UL
+#define PSW32_MASK_EXT 0x01000000UL
+#define PSW32_MASK_KEY 0x00F00000UL
+#define PSW32_MASK_BASE 0x00080000UL /* Always one */
+#define PSW32_MASK_MCHECK 0x00040000UL
+#define PSW32_MASK_WAIT 0x00020000UL
+#define PSW32_MASK_PSTATE 0x00010000UL
+#define PSW32_MASK_ASC 0x0000C000UL
+#define PSW32_MASK_CC 0x00003000UL
+#define PSW32_MASK_PM 0x00000f00UL
+#define PSW32_MASK_RI 0x00000080UL
+
+#define PSW32_ADDR_AMODE 0x80000000UL
+#define PSW32_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY (((u32)PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY 0x00000000UL
+#define PSW32_ASC_ACCREG 0x00004000UL
+#define PSW32_ASC_SECONDARY 0x00008000UL
+#define PSW32_ASC_HOME 0x0000C000UL
+
+typedef struct {
+ unsigned int mask;
+ unsigned int addr;
+} psw_t32 __aligned(8);
+
#define PGM_INT_CODE_MASK 0x7f
#define PGM_INT_CODE_PER 0x80
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 4b9b14b20984..2f983e0b95e0 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -18,7 +18,6 @@
#define QDIO_MAX_BUFFERS_MASK (QDIO_MAX_BUFFERS_PER_Q - 1)
#define QDIO_BUFNR(num) ((num) & QDIO_MAX_BUFFERS_MASK)
#define QDIO_MAX_ELEMENTS_PER_BUFFER 16
-#define QDIO_SBAL_SIZE 256
#define QDIO_QETH_QFMT 0
#define QDIO_ZFCP_QFMT 1
@@ -92,8 +91,8 @@ struct qdr {
* @pfmt: implementation dependent parameter format
* @rflags: QEBSM
* @ac: adapter characteristics
- * @isliba: absolute address of first input SLIB
- * @osliba: absolute address of first output SLIB
+ * @isliba: logical address of first input SLIB
+ * @osliba: logical address of first output SLIB
* @ebcnam: adapter identifier in EBCDIC
* @parm: implementation dependent parameters
*/
@@ -134,9 +133,9 @@ struct slibe {
* @sb_count: number of storage blocks
* @sba: storage block element addresses
* @dcount: size of storage block elements
- * @user0: user defineable value
- * @res4: reserved paramater
- * @user1: user defineable value
+ * @user0: user definable value
+ * @res4: reserved parameter
+ * @user1: user definable value
*/
struct qaob {
u64 res0[6];
@@ -313,7 +312,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
* @qib_rflags: rflags to set
* @no_input_qs: number of input queues
* @no_output_qs: number of output queues
- * @input_handler: handler to be called for input queues
+ * @input_handler: handler to be called for input queues, and device-wide errors
* @output_handler: handler to be called for output queues
* @irq_poll: Data IRQ polling handler
* @scan_threshold: # of in-use buffers that triggers scan on output queue
@@ -337,9 +336,6 @@ struct qdio_initialize {
struct qdio_buffer ***output_sbal_addr_array;
};
-#define QDIO_FLAG_SYNC_INPUT 0x01
-#define QDIO_FLAG_SYNC_OUTPUT 0x02
-
int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
@@ -349,13 +345,18 @@ extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
extern int qdio_establish(struct ccw_device *cdev,
struct qdio_initialize *init_data);
extern int qdio_activate(struct ccw_device *);
-extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr,
- unsigned int bufnr, unsigned int count, struct qaob *aob);
extern int qdio_start_irq(struct ccw_device *cdev);
extern int qdio_stop_irq(struct ccw_device *cdev);
-extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
- bool is_input, unsigned int *bufnr,
- unsigned int *error);
+extern int qdio_inspect_input_queue(struct ccw_device *cdev, unsigned int nr,
+ unsigned int *bufnr, unsigned int *error);
+extern int qdio_inspect_output_queue(struct ccw_device *cdev, unsigned int nr,
+ unsigned int *bufnr, unsigned int *error);
+extern int qdio_add_bufs_to_input_queue(struct ccw_device *cdev,
+ unsigned int q_nr, unsigned int bufnr,
+ unsigned int count);
+extern int qdio_add_bufs_to_output_queue(struct ccw_device *cdev,
+ unsigned int q_nr, unsigned int bufnr,
+ unsigned int count, struct qaob *aob);
extern int qdio_shutdown(struct ccw_device *, int);
extern int qdio_free(struct ccw_device *);
extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c68ea35de498..9d4c7f71e070 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright IBM Corp. 2007
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#ifndef _ASM_S390_SCLP_H
@@ -18,6 +17,7 @@
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
#ifndef __ASSEMBLY__
+#include <linux/uio.h>
#include <asm/chpid.h>
#include <asm/cpu.h>
@@ -88,6 +88,11 @@ struct sclp_info {
unsigned char has_diag318 : 1;
unsigned char has_sipl : 1;
unsigned char has_dirq : 1;
+ unsigned char has_iplcc : 1;
+ unsigned char has_zpci_lsi : 1;
+ unsigned char has_aisii : 1;
+ unsigned char has_aeni : 1;
+ unsigned char has_aisi : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
@@ -112,7 +117,7 @@ struct zpci_report_error_header {
* (OpenCrypto Successful Diagnostics Execution)
*/
u16 length; /* Length of Subsequent Data (up to 4K – SCLP header */
- u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
+ u8 data[]; /* Subsequent Data passed verbatim to SCLP ET 24 */
} __packed;
extern char *sclp_early_sccb;
@@ -142,8 +147,7 @@ int sclp_pci_deconfigure(u32 fid);
int sclp_ap_configure(u32 apid);
int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
-int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
void sclp_ocf_cpc_name_copy(char *dst);
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index a7c3ccf681da..322bdcd4b616 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -215,6 +215,11 @@ union scsw {
#define SNS2_ENV_DATA_PRESENT 0x10
#define SNS2_INPRECISE_END 0x04
+/*
+ * architectured values for PPRC errors
+ */
+#define SNS7_INVALID_ON_SEC 0x0e
+
/**
* scsw_is_tm - check for transport mode scsw
* @scsw: pointer to scsw
@@ -508,9 +513,21 @@ static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
*/
static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
{
- return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
- !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+ /* Must be status pending. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Must have alert status. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS))
+ return 0;
+
+ /* Must be alone or together with primary, secondary or both,
+ * => no intermediate status.
+ */
+ if (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS)
+ return 0;
+
+ return 1;
}
/**
@@ -522,10 +539,25 @@ static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
*/
static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
{
- return (scsw->cmd.fctl != 0) &&
- (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
- (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
- (scsw->cmd.actl & SCSW_ACTL_SUSPENDED));
+ /* Must indicate at least one I/O function. */
+ if (!scsw->cmd.fctl)
+ return 0;
+
+ /* Must be status pending. */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Can be status pending alone, or with any combination of primary,
+ * secondary and alert => no intermediate status.
+ */
+ if (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS))
+ return 1;
+
+ /* If intermediate, must be suspended. */
+ if (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)
+ return 1;
+
+ return 0;
}
/**
@@ -675,9 +707,21 @@ static inline int scsw_tm_is_valid_q(union scsw *scsw)
*/
static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
{
- return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
- !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+ /* Must be status pending. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Must have alert status. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS))
+ return 0;
+
+ /* Must be alone or together with primary, secondary or both,
+ * => no intermediate status.
+ */
+ if (scsw->tm.stctl & SCSW_STCTL_INTER_STATUS)
+ return 0;
+
+ return 1;
}
/**
@@ -689,11 +733,25 @@ static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
*/
static inline int scsw_tm_is_valid_pno(union scsw *scsw)
{
- return (scsw->tm.fctl != 0) &&
- (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
- (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
- ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
- (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+ /* Must indicate at least one I/O function. */
+ if (!scsw->tm.fctl)
+ return 0;
+
+ /* Must be status pending. */
+ if (!(scsw->tm.stctl & SCSW_STCTL_STATUS_PEND))
+ return 0;
+
+ /* Can be status pending alone, or with any combination of primary,
+ * secondary and alert => no intermediate status.
+ */
+ if (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS))
+ return 1;
+
+ /* If intermediate, must be suspended. */
+ if (scsw->tm.actl & SCSW_ACTL_SUSPENDED)
+ return 1;
+
+ return 0;
}
/**
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index f16f4d054ae2..73ed2781073b 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -3,7 +3,6 @@
* Copyright IBM Corp. 1999, 2012
* Author(s): Denis Joseph Barrow,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
@@ -31,7 +30,8 @@ extern void smp_emergency_stop(void);
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
-extern void smp_save_dump_cpus(void);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
@@ -59,6 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
+extern int smp_reinit_ipl_cpu(void);
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
index fd17f25704bd..1ac5115d3115 100644
--- a/arch/s390/include/asm/softirq_stack.h
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -5,9 +5,10 @@
#include <asm/lowcore.h>
#include <asm/stacktrace.h>
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static inline void do_softirq_own_stack(void)
{
call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
}
-
+#endif
#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 888a2f1c9ee3..37127cd7749e 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -77,8 +77,9 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
+ kcsan_release();
asm_inline volatile(
- ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index dd00d98804ec..b23c658dce77 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -36,25 +36,24 @@ static inline bool on_stack(struct stack_info *info,
/*
* Stack layout of a C stack frame.
+ * Kernel uses the packed stack layout (-mpacked-stack).
*/
-#ifndef __PACK_STACK
struct stack_frame {
- unsigned long back_chain;
- unsigned long empty1[5];
- unsigned long gprs[10];
- unsigned int empty2[8];
-};
-#else
-struct stack_frame {
- unsigned long empty1[5];
- unsigned int empty2[8];
+ union {
+ unsigned long empty[9];
+ struct {
+ unsigned long sie_control_block;
+ unsigned long sie_savearea;
+ unsigned long sie_reason;
+ unsigned long sie_flags;
+ };
+ };
unsigned long gprs[10];
unsigned long back_chain;
};
-#endif
/*
- * Unlike current_stack_pointer() which simply returns current value of %r15
+ * Unlike current_stack_pointer which simply contains the current value of %r15
* current_frame_address() returns function stack frame address, which matches
* %r15 upon function invocation. It may differ from %r15 later if function
* allocates stack for local variables or new stack frame to call other
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index ba07463897c1..4d74d7e33340 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -44,8 +44,8 @@ struct stp_sstpi {
u32 : 32;
u32 ctnid[3];
u32 : 32;
- u32 todoff[4];
- u32 rsvd[48];
+ u64 todoff;
+ u32 rsvd[50];
} __packed;
struct stp_tzib {
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index ad2c996e7e93..fde7e6b1df48 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -162,4 +162,4 @@
__diag_pop(); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
-#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
+#endif /* _ASM_S390_SYSCALL_WRAPPER_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index fe7b3f8f0791..ab1c6316055c 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -67,12 +67,12 @@ struct sysinfo_1_2_2 {
unsigned short cpus_configured;
unsigned short cpus_standby;
unsigned short cpus_reserved;
- unsigned short adjustment[0];
+ unsigned short adjustment[];
};
struct sysinfo_1_2_2_extension {
unsigned int alt_capability;
- unsigned short alt_adjustment[0];
+ unsigned short alt_adjustment[];
};
struct sysinfo_2_2_1 {
@@ -181,7 +181,7 @@ struct sysinfo_15_1_x {
unsigned char reserved1;
unsigned char mnest;
unsigned char reserved2[4];
- union topology_entry tle[0];
+ union topology_entry tle[];
};
int stsi(void *sysinfo, int fc, int sel1, int sel2);
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
deleted file mode 100644
index 46fa3020b41e..000000000000
--- a/arch/s390/include/asm/termios.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-#ifndef _S390_TERMIOS_H
-#define _S390_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-
-/* intr=^C quit=^\ erase=del kill=^U
- eof=^D vtime=\0 vmin=\1 sxtc=\0
- start=^Q stop=^S susp=^Z eol=\0
- reprint=^R discard=^U werase=^W lnext=^V
- eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
-#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
-
-#include <asm-generic/termios-base.h>
-
-#endif /* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 50d9b04ecbd1..ce878e85b6e4 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -148,7 +148,7 @@ struct ptff_qui {
asm volatile( \
" lgr 0,%[reg0]\n" \
" lgr 1,%[reg1]\n" \
- " .insn e,0x0104\n" \
+ " ptff\n" \
" ipm %[rc]\n" \
" srl %[rc],28\n" \
: [rc] "=&d" (rc), "+m" (*(struct addrtype *)reg1) \
@@ -187,20 +187,17 @@ static inline unsigned long get_tod_clock(void)
static inline unsigned long get_tod_clock_fast(void)
{
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
unsigned long clk;
asm volatile("stckf %0" : "=Q" (clk) : : "cc");
return clk;
-#else
- return get_tod_clock();
-#endif
}
static inline cycles_t get_cycles(void)
{
return (cycles_t) get_tod_clock() >> 2;
}
+#define get_cycles get_cycles
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f0eb1b..3a5c8fb590e5 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -27,9 +27,6 @@ static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 6448bb5be10c..a6e2cd89b609 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -25,9 +25,7 @@ static inline void __tlb_flush_idte(unsigned long asce)
if (MACHINE_HAS_TLB_GUEST)
opt |= IDTE_GUEST_ASCE;
/* Global TLB flush for the mm */
- asm volatile(
- " .insn rrf,0xb98e0000,0,%0,%1,0"
- : : "a" (opt), "a" (asce) : "cc");
+ asm volatile("idte 0,%1,%0" : : "a" (opt), "a" (asce) : "cc");
}
/*
diff --git a/arch/s390/include/asm/tpi.h b/arch/s390/include/asm/tpi.h
index 1ac538b8cbf5..f76e5fdff23a 100644
--- a/arch/s390/include/asm/tpi.h
+++ b/arch/s390/include/asm/tpi.h
@@ -19,6 +19,19 @@ struct tpi_info {
u32 :12;
} __packed __aligned(4);
+/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
+struct tpi_adapter_info {
+ u32 aism:8;
+ u32 :22;
+ u32 error:1;
+ u32 forward:1;
+ u32 reserved;
+ u32 adapter_IO:1;
+ u32 directed_irq:1;
+ u32 isc:3;
+ u32 :27;
+} __packed __aligned(4);
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_TPI_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index ce550d06abc3..f7038b800cc3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -3,7 +3,7 @@
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Hartmut Penner (hp@de.ibm.com),
- * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
*
* Derived from "include/asm-i386/uaccess.h"
*/
@@ -13,26 +13,15 @@
/*
* User space memory access functions
*/
+#include <asm/asm-extable.h>
#include <asm/processor.h>
#include <asm/ctl_reg.h>
#include <asm/extable.h>
#include <asm/facility.h>
+#include <asm-generic/access_ok.h>
void debug_user_asce(int exit);
-static inline int __range_ok(unsigned long addr, unsigned long size)
-{
- return 1;
-}
-
-#define __access_ok(addr, size) \
-({ \
- __chk_user_ptr(addr); \
- __range_ok((unsigned long)(addr), (size)); \
-})
-
-#define access_ok(addr, size) __access_ok(addr, size)
-
unsigned long __must_check
raw_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -44,30 +33,72 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
#define INLINE_COPY_TO_USER
#endif
-int __put_user_bad(void) __attribute__((noreturn));
-int __get_user_bad(void) __attribute__((noreturn));
-
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-
-#define __put_get_user_asm(to, from, size, insn) \
-({ \
- int __rc; \
- \
- asm volatile( \
- insn " 0,%[spec]\n" \
- "0: mvcos %[_to],%[_from],%[_size]\n" \
- "1: xr %[rc],%[rc]\n" \
- "2:\n" \
- ".pushsection .fixup, \"ax\"\n" \
- "3: lhi %[rc],%[retval]\n" \
- " jg 2b\n" \
- ".popsection\n" \
- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
- : [_size] "d" (size), [_from] "Q" (*(from)), \
- [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \
- : "cc", "0"); \
- __rc; \
+unsigned long __must_check
+_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key);
+
+static __always_inline unsigned long __must_check
+copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key)
+{
+ if (check_copy_size(to, n, false))
+ n = _copy_from_user_key(to, from, n, key);
+ return n;
+}
+
+unsigned long __must_check
+_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key);
+
+static __always_inline unsigned long __must_check
+copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key)
+{
+ if (check_copy_size(from, n, true))
+ n = _copy_to_user_key(to, from, n, key);
+ return n;
+}
+
+union oac {
+ unsigned int val;
+ struct {
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac1;
+ struct {
+ unsigned short key : 4;
+ unsigned short : 4;
+ unsigned short as : 2;
+ unsigned short : 4;
+ unsigned short k : 1;
+ unsigned short a : 1;
+ } oac2;
+ };
+};
+
+int __noreturn __put_user_bad(void);
+
+#define __put_user_asm(to, from, size) \
+({ \
+ union oac __oac_spec = { \
+ .oac1.as = PSW_BITS_AS_SECONDARY, \
+ .oac1.a = 1, \
+ }; \
+ int __rc; \
+ \
+ asm volatile( \
+ " lr 0,%[spec]\n" \
+ "0: mvcos %[_to],%[_from],%[_size]\n" \
+ "1: xr %[rc],%[rc]\n" \
+ "2:\n" \
+ EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
+ EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
+ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
+ : [_size] "d" (size), [_from] "Q" (*(from)), \
+ [spec] "d" (__oac_spec.val) \
+ : "cc", "0"); \
+ __rc; \
})
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
@@ -76,24 +107,24 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char __user *)ptr,
- (unsigned char *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned char __user *)ptr,
+ (unsigned char *)x,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short __user *)ptr,
- (unsigned short *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned short __user *)ptr,
+ (unsigned short *)x,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int __user *)ptr,
- (unsigned int *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned int __user *)ptr,
+ (unsigned int *)x,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long __user *)ptr,
- (unsigned long *)x,
- size, "llilh");
+ rc = __put_user_asm((unsigned long __user *)ptr,
+ (unsigned long *)x,
+ size);
break;
default:
__put_user_bad();
@@ -102,30 +133,55 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
return rc;
}
+int __noreturn __get_user_bad(void);
+
+#define __get_user_asm(to, from, size) \
+({ \
+ union oac __oac_spec = { \
+ .oac2.as = PSW_BITS_AS_SECONDARY, \
+ .oac2.a = 1, \
+ }; \
+ int __rc; \
+ \
+ asm volatile( \
+ " lr 0,%[spec]\n" \
+ "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \
+ "1: xr %[rc],%[rc]\n" \
+ "2:\n" \
+ EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
+ EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
+ : [rc] "=&d" (__rc), "=Q" (*(to)) \
+ : [_size] "d" (size), [_from] "Q" (*(from)), \
+ [spec] "d" (__oac_spec.val), [_to] "a" (to), \
+ [_ksize] "K" (size) \
+ : "cc", "0"); \
+ __rc; \
+})
+
static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
int rc;
switch (size) {
case 1:
- rc = __put_get_user_asm((unsigned char *)x,
- (unsigned char __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned char *)x,
+ (unsigned char __user *)ptr,
+ size);
break;
case 2:
- rc = __put_get_user_asm((unsigned short *)x,
- (unsigned short __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned short *)x,
+ (unsigned short __user *)ptr,
+ size);
break;
case 4:
- rc = __put_get_user_asm((unsigned int *)x,
- (unsigned int __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned int *)x,
+ (unsigned int __user *)ptr,
+ size);
break;
case 8:
- rc = __put_get_user_asm((unsigned long *)x,
- (unsigned long __user *)ptr,
- size, "lghi");
+ rc = __get_user_asm((unsigned long *)x,
+ (unsigned long __user *)ptr,
+ size);
break;
default:
__get_user_bad();
@@ -134,97 +190,81 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
return rc;
}
-#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
-
-static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
-{
- size = raw_copy_to_user(ptr, x, size);
- return size ? -EFAULT : 0;
-}
-
-static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
-{
- size = raw_copy_from_user(x, ptr, size);
- return size ? -EFAULT : 0;
-}
-
-#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
-
/*
* These are the main single-value transfer routines. They automatically
* use the right size if we just have the right pointer type.
*/
-#define __put_user(x, ptr) \
-({ \
- __typeof__(*(ptr)) __x = (x); \
- int __pu_err = -EFAULT; \
- __chk_user_ptr(ptr); \
- switch (sizeof (*(ptr))) { \
- case 1: \
- case 2: \
- case 4: \
- case 8: \
- __pu_err = __put_user_fn(&__x, ptr, \
- sizeof(*(ptr))); \
- break; \
- default: \
- __put_user_bad(); \
- break; \
- } \
- __builtin_expect(__pu_err, 0); \
+#define __put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __x = (x); \
+ int __pu_err = -EFAULT; \
+ \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ case 2: \
+ case 4: \
+ case 8: \
+ __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ break; \
+ default: \
+ __put_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__pu_err, 0); \
})
-#define put_user(x, ptr) \
-({ \
- might_fault(); \
- __put_user(x, ptr); \
+#define put_user(x, ptr) \
+({ \
+ might_fault(); \
+ __put_user(x, ptr); \
})
-
-#define __get_user(x, ptr) \
-({ \
- int __gu_err = -EFAULT; \
- __chk_user_ptr(ptr); \
- switch (sizeof(*(ptr))) { \
- case 1: { \
- unsigned char __x = 0; \
- __gu_err = __get_user_fn(&__x, ptr, \
- sizeof(*(ptr))); \
- (x) = *(__force __typeof__(*(ptr)) *) &__x; \
- break; \
- }; \
- case 2: { \
- unsigned short __x = 0; \
- __gu_err = __get_user_fn(&__x, ptr, \
- sizeof(*(ptr))); \
- (x) = *(__force __typeof__(*(ptr)) *) &__x; \
- break; \
- }; \
- case 4: { \
- unsigned int __x = 0; \
- __gu_err = __get_user_fn(&__x, ptr, \
- sizeof(*(ptr))); \
- (x) = *(__force __typeof__(*(ptr)) *) &__x; \
- break; \
- }; \
- case 8: { \
- unsigned long long __x = 0; \
- __gu_err = __get_user_fn(&__x, ptr, \
- sizeof(*(ptr))); \
- (x) = *(__force __typeof__(*(ptr)) *) &__x; \
- break; \
- }; \
- default: \
- __get_user_bad(); \
- break; \
- } \
- __builtin_expect(__gu_err, 0); \
+#define __get_user(x, ptr) \
+({ \
+ int __gu_err = -EFAULT; \
+ \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: { \
+ unsigned char __x; \
+ \
+ __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *)&__x; \
+ break; \
+ }; \
+ case 2: { \
+ unsigned short __x; \
+ \
+ __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *)&__x; \
+ break; \
+ }; \
+ case 4: { \
+ unsigned int __x; \
+ \
+ __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *)&__x; \
+ break; \
+ }; \
+ case 8: { \
+ unsigned long __x; \
+ \
+ __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \
+ (x) = *(__force __typeof__(*(ptr)) *)&__x; \
+ break; \
+ }; \
+ default: \
+ __get_user_bad(); \
+ break; \
+ } \
+ __builtin_expect(__gu_err, 0); \
})
-#define get_user(x, ptr) \
-({ \
- might_fault(); \
- __get_user(x, ptr); \
+#define get_user(x, ptr) \
+({ \
+ might_fault(); \
+ __get_user(x, ptr); \
})
/*
@@ -245,11 +285,8 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
-int copy_to_user_real(void __user *dest, void *src, unsigned long count);
void *s390_kernel_write(void *dst, const void *src, size_t size);
-#define HAVE_GET_KERNEL_NOFAULT
-
int __noreturn __put_kernel_bad(void);
#define __put_kernel_asm(val, to, insn) \
@@ -257,23 +294,20 @@ int __noreturn __put_kernel_bad(void);
int __rc; \
\
asm volatile( \
- "0: " insn " %2,%1\n" \
- "1: xr %0,%0\n" \
+ "0: " insn " %[_val],%[_to]\n" \
+ "1: xr %[rc],%[rc]\n" \
"2:\n" \
- ".pushsection .fixup, \"ax\"\n" \
- "3: lhi %0,%3\n" \
- " jg 2b\n" \
- ".popsection\n" \
- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
- : "=d" (__rc), "+Q" (*(to)) \
- : "d" (val), "K" (-EFAULT) \
+ EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
+ EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
+ : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \
+ : [_val] "d" (val) \
: "cc"); \
__rc; \
})
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- u64 __x = (u64)(*((type *)(src))); \
+ unsigned long __x = (unsigned long)(*((type *)(src))); \
int __pk_err; \
\
switch (sizeof(type)) { \
@@ -304,16 +338,13 @@ int __noreturn __get_kernel_bad(void);
int __rc; \
\
asm volatile( \
- "0: " insn " %1,%2\n" \
- "1: xr %0,%0\n" \
+ "0: " insn " %[_val],%[_from]\n" \
+ "1: xr %[rc],%[rc]\n" \
"2:\n" \
- ".pushsection .fixup, \"ax\"\n" \
- "3: lhi %0,%3\n" \
- " jg 2b\n" \
- ".popsection\n" \
- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
- : "=d" (__rc), "+d" (val) \
- : "Q" (*(from)), "K" (-EFAULT) \
+ EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \
+ EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \
+ : [rc] "=d" (__rc), [_val] "=d" (val) \
+ : [_from] "Q" (*(from)) \
: "cc"); \
__rc; \
})
@@ -324,28 +355,28 @@ do { \
\
switch (sizeof(type)) { \
case 1: { \
- u8 __x = 0; \
+ unsigned char __x; \
\
__gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
*((type *)(dst)) = (type)__x; \
break; \
}; \
case 2: { \
- u16 __x = 0; \
+ unsigned short __x; \
\
__gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
*((type *)(dst)) = (type)__x; \
break; \
}; \
case 4: { \
- u32 __x = 0; \
+ unsigned int __x; \
\
__gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
*((type *)(dst)) = (type)__x; \
break; \
}; \
case 8: { \
- u64 __x = 0; \
+ unsigned long __x; \
\
__gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
*((type *)(dst)) = (type)__x; \
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 9e9f75ef046a..4260bc5ce7f8 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -28,6 +28,7 @@
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
# ifdef CONFIG_COMPAT
+# define __ARCH_WANT_COMPAT_STAT
# define __ARCH_WANT_SYS_TIME32
# define __ARCH_WANT_SYS_UTIME32
# endif
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 5ebf534ef753..02462e7100c1 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -4,6 +4,8 @@
#include <linux/sched.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
+#include <linux/llist.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
@@ -36,10 +38,21 @@ struct unwind_state {
struct pt_regs *regs;
unsigned long sp, ip;
int graph_idx;
+ struct llist_node *kr_cur;
bool reliable;
bool error;
};
+/* Recover the return address modified by kretprobe and ftrace_graph. */
+static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+ unsigned long ip)
+{
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
+ if (is_kretprobe_trampoline(ip))
+ ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+ return ip;
+}
+
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long first_frame);
bool unwind_next_frame(struct unwind_state *state);
diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h
index 0ca572ced21b..8e8aaf48582e 100644
--- a/arch/s390/include/asm/user.h
+++ b/arch/s390/include/asm/user.h
@@ -67,9 +67,5 @@ struct user {
unsigned long magic; /* To uniquely identify a core file */
char u_comm[32]; /* User command that was responsible */
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _S390_USER_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 72d3e49c2860..be3ef9dd6972 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -2,7 +2,7 @@
/*
* Ultravisor Interfaces
*
- * Copyright IBM Corp. 2019
+ * Copyright IBM Corp. 2019, 2022
*
* Author(s):
* Vasily Gorbik <gor@linux.ibm.com>
@@ -50,8 +50,13 @@
#define UVC_CMD_SET_UNSHARE_ALL 0x0340
#define UVC_CMD_PIN_PAGE_SHARED 0x0341
#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
+#define UVC_CMD_DUMP_INIT 0x0400
+#define UVC_CMD_DUMP_CONF_STOR_STATE 0x0401
+#define UVC_CMD_DUMP_CPU 0x0402
+#define UVC_CMD_DUMP_COMPLETE 0x0403
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
+#define UVC_CMD_RETR_ATTEST 0x1020
/* Bits in installed uv calls */
enum uv_cmds_inst {
@@ -76,10 +81,16 @@ enum uv_cmds_inst {
BIT_UVC_CMD_UNSHARE_ALL = 20,
BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+ BIT_UVC_CMD_DUMP_INIT = 24,
+ BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
+ BIT_UVC_CMD_DUMP_CPU = 26,
+ BIT_UVC_CMD_DUMP_COMPLETE = 27,
+ BIT_UVC_CMD_RETR_ATTEST = 28,
};
enum uv_feat_ind {
BIT_UV_FEAT_MISC = 0,
+ BIT_UV_FEAT_AIV = 1,
};
struct uv_cb_header {
@@ -91,23 +102,32 @@ struct uv_cb_header {
/* Query Ultravisor Information */
struct uv_cb_qui {
- struct uv_cb_header header;
- u64 reserved08;
- u64 inst_calls_list[4];
- u64 reserved30[2];
- u64 uv_base_stor_len;
- u64 reserved48;
- u64 conf_base_phys_stor_len;
- u64 conf_base_virt_stor_len;
- u64 conf_virt_var_stor_len;
- u64 cpu_stor_len;
- u32 reserved70[3];
- u32 max_num_sec_conf;
- u64 max_guest_stor_addr;
- u8 reserved88[158 - 136];
- u16 max_guest_cpu_id;
- u64 uv_feature_indications;
- u8 reserveda0[200 - 168];
+ struct uv_cb_header header; /* 0x0000 */
+ u64 reserved08; /* 0x0008 */
+ u64 inst_calls_list[4]; /* 0x0010 */
+ u64 reserved30[2]; /* 0x0030 */
+ u64 uv_base_stor_len; /* 0x0040 */
+ u64 reserved48; /* 0x0048 */
+ u64 conf_base_phys_stor_len; /* 0x0050 */
+ u64 conf_base_virt_stor_len; /* 0x0058 */
+ u64 conf_virt_var_stor_len; /* 0x0060 */
+ u64 cpu_stor_len; /* 0x0068 */
+ u32 reserved70[3]; /* 0x0070 */
+ u32 max_num_sec_conf; /* 0x007c */
+ u64 max_guest_stor_addr; /* 0x0080 */
+ u8 reserved88[158 - 136]; /* 0x0088 */
+ u16 max_guest_cpu_id; /* 0x009e */
+ u64 uv_feature_indications; /* 0x00a0 */
+ u64 reserveda8; /* 0x00a8 */
+ u64 supp_se_hdr_versions; /* 0x00b0 */
+ u64 supp_se_hdr_pcf; /* 0x00b8 */
+ u64 reservedc0; /* 0x00c0 */
+ u64 conf_dump_storage_state_len; /* 0x00c8 */
+ u64 conf_dump_finalize_len; /* 0x00d0 */
+ u64 reservedd8; /* 0x00d8 */
+ u64 supp_att_req_hdr_ver; /* 0x00e0 */
+ u64 supp_att_pflags; /* 0x00e8 */
+ u8 reservedf0[256 - 240]; /* 0x00f0 */
} __packed __aligned(8);
/* Initialize Ultravisor */
@@ -218,6 +238,50 @@ struct uv_cb_share {
u64 reserved28;
} __packed __aligned(8);
+/* Retrieve Attestation Measurement */
+struct uv_cb_attest {
+ struct uv_cb_header header; /* 0x0000 */
+ u64 reserved08[2]; /* 0x0008 */
+ u64 arcb_addr; /* 0x0018 */
+ u64 cont_token; /* 0x0020 */
+ u8 reserved28[6]; /* 0x0028 */
+ u16 user_data_len; /* 0x002e */
+ u8 user_data[256]; /* 0x0030 */
+ u32 reserved130[3]; /* 0x0130 */
+ u32 meas_len; /* 0x013c */
+ u64 meas_addr; /* 0x0140 */
+ u8 config_uid[16]; /* 0x0148 */
+ u32 reserved158; /* 0x0158 */
+ u32 add_data_len; /* 0x015c */
+ u64 add_data_addr; /* 0x0160 */
+ u64 reserved168[4]; /* 0x0168 */
+} __packed __aligned(8);
+
+struct uv_cb_dump_cpu {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 dump_area_origin;
+ u64 reserved28[5];
+} __packed __aligned(8);
+
+struct uv_cb_dump_stor_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 config_handle;
+ u64 dump_area_origin;
+ u64 gaddr;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+struct uv_cb_dump_complete {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 config_handle;
+ u64 dump_area_origin;
+ u64 reserved30[5];
+} __packed __aligned(8);
+
static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
@@ -285,6 +349,12 @@ struct uv_info {
unsigned int max_num_sec_conf;
unsigned short max_guest_cpu_id;
unsigned long uv_feature_indications;
+ unsigned long supp_se_hdr_ver;
+ unsigned long supp_se_hdr_pcf;
+ unsigned long conf_dump_storage_state_len;
+ unsigned long conf_dump_finalize_len;
+ unsigned long supp_att_req_hdr_ver;
+ unsigned long supp_att_pflags;
};
extern struct uv_info uv_info;
@@ -356,6 +426,7 @@ static inline int is_prot_virt_host(void)
}
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_owned_page(unsigned long paddr);
int uv_convert_from_secure(unsigned long paddr);
int uv_convert_owned_from_secure(unsigned long paddr);
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 0c05a673811c..95480ed9149e 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -366,17 +366,27 @@
.macro VLM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
VX_NUM v3, \vto
- GR_NUM b2, \base /* Base register */
+ GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word (b2 << 12) | (\disp)
MRXBOPC \hint, 0x36, v1, v3
.endm
+/* VECTOR STORE */
+.macro VST vr1, disp, index="%r0", base
+ VX_NUM v1, \vr1
+ GR_NUM x2, \index
+ GR_NUM b2, \base
+ .word 0xE700 | ((v1&15) << 4) | (x2&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC 0, 0x0E, v1
+.endm
+
/* VECTOR STORE MULTIPLE */
.macro VSTM vfrom, vto, disp, base, hint=3
VX_NUM v1, \vfrom
VX_NUM v3, \vto
- GR_NUM b2, \base /* Base register */
+ GR_NUM b2, \base
.word 0xE700 | ((v1&15) << 4) | (v3&15)
.word (b2 << 12) | (\disp)
MRXBOPC \hint, 0x3E, v1, v3
@@ -411,6 +421,81 @@
VUPLL \vr1, \vr2, 2
.endm
+/* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */
+.macro VPDI vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0x84, v1, v2, v3
+.endm
+
+/* VECTOR REPLICATE */
+.macro VREP vr1, vr3, imm2, m4
+ VX_NUM v1, \vr1
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v3&15)
+ .word \imm2
+ MRXBOPC \m4, 0x4D, v1, v3
+.endm
+.macro VREPB vr1, vr3, imm2
+ VREP \vr1, \vr3, \imm2, 0
+.endm
+.macro VREPH vr1, vr3, imm2
+ VREP \vr1, \vr3, \imm2, 1
+.endm
+.macro VREPF vr1, vr3, imm2
+ VREP \vr1, \vr3, \imm2, 2
+.endm
+.macro VREPG vr1, vr3, imm2
+ VREP \vr1, \vr3, \imm2, 3
+.endm
+
+/* VECTOR MERGE HIGH */
+.macro VMRH vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0x61, v1, v2, v3
+.endm
+.macro VMRHB vr1, vr2, vr3
+ VMRH \vr1, \vr2, \vr3, 0
+.endm
+.macro VMRHH vr1, vr2, vr3
+ VMRH \vr1, \vr2, \vr3, 1
+.endm
+.macro VMRHF vr1, vr2, vr3
+ VMRH \vr1, \vr2, \vr3, 2
+.endm
+.macro VMRHG vr1, vr2, vr3
+ VMRH \vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR MERGE LOW */
+.macro VMRL vr1, vr2, vr3, m4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12)
+ MRXBOPC \m4, 0x60, v1, v2, v3
+.endm
+.macro VMRLB vr1, vr2, vr3
+ VMRL \vr1, \vr2, \vr3, 0
+.endm
+.macro VMRLH vr1, vr2, vr3
+ VMRL \vr1, \vr2, \vr3, 1
+.endm
+.macro VMRLF vr1, vr2, vr3
+ VMRL \vr1, \vr2, \vr3, 2
+.endm
+.macro VMRLG vr1, vr2, vr3
+ VMRL \vr1, \vr2, \vr3, 3
+.endm
+
/* Vector integer instructions */
@@ -557,5 +642,37 @@
VESRAV \vr1, \vr2, \vr3, 3
.endm
+/* VECTOR ELEMENT ROTATE LEFT LOGICAL */
+.macro VERLL vr1, vr3, disp, base="%r0", m4
+ VX_NUM v1, \vr1
+ VX_NUM v3, \vr3
+ GR_NUM b2, \base
+ .word 0xE700 | ((v1&15) << 4) | (v3&15)
+ .word (b2 << 12) | (\disp)
+ MRXBOPC \m4, 0x33, v1, v3
+.endm
+.macro VERLLB vr1, vr3, disp, base="%r0"
+ VERLL \vr1, \vr3, \disp, \base, 0
+.endm
+.macro VERLLH vr1, vr3, disp, base="%r0"
+ VERLL \vr1, \vr3, \disp, \base, 1
+.endm
+.macro VERLLF vr1, vr3, disp, base="%r0"
+ VERLL \vr1, \vr3, \disp, \base, 2
+.endm
+.macro VERLLG vr1, vr3, disp, base="%r0"
+ VERLL \vr1, \vr3, \disp, \base, 3
+.endm
+
+/* VECTOR SHIFT LEFT DOUBLE BY BYTE */
+.macro VSLDB vr1, vr2, vr3, imm4
+ VX_NUM v1, \vr1
+ VX_NUM v2, \vr2
+ VX_NUM v3, \vr3
+ .word 0xE700 | ((v1&15) << 4) | (v2&15)
+ .word ((v3&15) << 12) | (\imm4)
+ MRXBOPC 0, 0x77, v1, v2, v3
+.endm
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 9ec86fae9980..93d1ccd3304c 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -183,6 +183,18 @@ typedef struct format_data_t {
} format_data_t;
/*
+ * struct dasd_copypair_swap_data_t
+ * represents all data necessary to issue a swap of the copy pair relation
+ */
+struct dasd_copypair_swap_data_t {
+ char primary[20]; /* BUSID of primary */
+ char secondary[20]; /* BUSID of secondary */
+
+ /* Reserved for future updates. */
+ __u8 reserved[64];
+};
+
+/*
* values to be used for format_data_t.intensity
* 0/8: normal format
* 1/9: also write record zero
@@ -326,6 +338,8 @@ struct dasd_snid_ioctl_data {
#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t)
/* Release Allocated Space */
#define BIODASDRAS _IOW(DASD_IOCTL_LETTER, 3, format_data_t)
+/* Swap copy pair relation */
+#define BIODASDCOPYPAIRSWAP _IOW(DASD_IOCTL_LETTER, 4, struct dasd_copypair_swap_data_t)
/* Get Sense Path Group ID (SNID) data */
#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
diff --git a/arch/s390/include/uapi/asm/hwctrset.h b/arch/s390/include/uapi/asm/hwctrset.h
index 3d8284b95f87..e56b9dd23a4b 100644
--- a/arch/s390/include/uapi/asm/hwctrset.h
+++ b/arch/s390/include/uapi/asm/hwctrset.h
@@ -30,18 +30,18 @@ struct s390_ctrset_start { /* Set CPUs to operate on */
struct s390_ctrset_setdata { /* Counter set data */
__u32 set; /* Counter set number */
__u32 no_cnts; /* # of counters stored in cv[] */
- __u64 cv[0]; /* Counter values (variable length) */
+ __u64 cv[]; /* Counter values (variable length) */
};
struct s390_ctrset_cpudata { /* Counter set data per CPU */
__u32 cpu_nr; /* CPU number */
__u32 no_sets; /* # of counters sets in data[] */
- struct s390_ctrset_setdata data[0];
+ struct s390_ctrset_setdata data[];
};
struct s390_ctrset_read { /* Structure to get all ctr sets */
__u64 no_cpus; /* Total # of CPUs data taken from */
- struct s390_ctrset_cpudata data[0];
+ struct s390_ctrset_cpudata data[];
};
#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 7a6b14874d65..a73cf01a1606 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
#define KVM_S390_VM_CRYPTO 2
#define KVM_S390_VM_CPU_MODEL 3
#define KVM_S390_VM_MIGRATION 4
+#define KVM_S390_VM_CPU_TOPOLOGY 5
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 7349e96d28a0..924b876f992c 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -171,7 +171,7 @@ struct pkey_skey2pkey {
#define PKEY_SKEY2PKEY _IOWR(PKEY_IOCTL_MAGIC, 0x06, struct pkey_skey2pkey)
/*
- * Verify the given CCA AES secure key for being able to be useable with
+ * Verify the given CCA AES secure key for being able to be usable with
* the pkey module. Check for correct key type and check for having at
* least one crypto card being able to handle this key (master key
* or old master key verification pattern matches).
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
index 0189f326aac5..e74d6ba1bd3b 100644
--- a/arch/s390/include/uapi/asm/signal.h
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -108,7 +108,7 @@ struct sigaction {
typedef struct sigaltstack {
void __user *ss_sp;
int ss_flags;
- size_t ss_size;
+ __kernel_size_t ss_size;
} stack_t;
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
deleted file mode 100644
index 54223169c806..000000000000
--- a/arch/s390/include/uapi/asm/termios.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- *
- * Derived from "include/asm-i386/termios.h"
- */
-
-#ifndef _UAPI_S390_TERMIOS_H
-#define _UAPI_S390_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
- unsigned short c_iflag; /* input mode flags */
- unsigned short c_oflag; /* output mode flags */
- unsigned short c_cflag; /* control mode flags */
- unsigned short c_lflag; /* local mode flags */
- unsigned char c_line; /* line discipline */
- unsigned char c_cc[NCC]; /* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE 0x001
-#define TIOCM_DTR 0x002
-#define TIOCM_RTS 0x004
-#define TIOCM_ST 0x008
-#define TIOCM_SR 0x010
-#define TIOCM_CTS 0x020
-#define TIOCM_CAR 0x040
-#define TIOCM_RNG 0x080
-#define TIOCM_DSR 0x100
-#define TIOCM_CD TIOCM_CAR
-#define TIOCM_RI TIOCM_RNG
-#define TIOCM_OUT1 0x2000
-#define TIOCM_OUT2 0x4000
-#define TIOCM_LOOP 0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-
-#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/uvdevice.h b/arch/s390/include/uapi/asm/uvdevice.h
new file mode 100644
index 000000000000..10a5ac918e02
--- /dev/null
+++ b/arch/s390/include/uapi/asm/uvdevice.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2022
+ * Author(s): Steffen Eiden <seiden@linux.ibm.com>
+ */
+#ifndef __S390_ASM_UVDEVICE_H
+#define __S390_ASM_UVDEVICE_H
+
+#include <linux/types.h>
+
+struct uvio_ioctl_cb {
+ __u32 flags;
+ __u16 uv_rc; /* UV header rc value */
+ __u16 uv_rrc; /* UV header rrc value */
+ __u64 argument_addr; /* Userspace address of uvio argument */
+ __u32 argument_len;
+ __u8 reserved14[0x40 - 0x14]; /* must be zero */
+};
+
+#define UVIO_ATT_USER_DATA_LEN 0x100
+#define UVIO_ATT_UID_LEN 0x10
+struct uvio_attest {
+ __u64 arcb_addr; /* 0x0000 */
+ __u64 meas_addr; /* 0x0008 */
+ __u64 add_data_addr; /* 0x0010 */
+ __u8 user_data[UVIO_ATT_USER_DATA_LEN]; /* 0x0018 */
+ __u8 config_uid[UVIO_ATT_UID_LEN]; /* 0x0118 */
+ __u32 arcb_len; /* 0x0128 */
+ __u32 meas_len; /* 0x012c */
+ __u32 add_data_len; /* 0x0130 */
+ __u16 user_data_len; /* 0x0134 */
+ __u16 reserved136; /* 0x0136 */
+};
+
+/*
+ * The following max values define an upper length for the IOCTL in/out buffers.
+ * However, they do not represent the maximum the Ultravisor allows which is
+ * often way smaller. By allowing larger buffer sizes we hopefully do not need
+ * to update the code with every machine update. It is therefore possible for
+ * userspace to request more memory than actually used by kernel/UV.
+ */
+#define UVIO_ATT_ARCB_MAX_LEN 0x100000
+#define UVIO_ATT_MEASUREMENT_MAX_LEN 0x8000
+#define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000
+
+#define UVIO_DEVICE_NAME "uv"
+#define UVIO_TYPE_UVC 'u'
+
+#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb)
+
+#endif /* __S390_ASM_UVDEVICE_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 22fd202856bc..d83713f67530 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -4,7 +4,7 @@
*
* zcrypt 2.2.1 (user-visible header)
*
- * Copyright IBM Corp. 2001, 2019
+ * Copyright IBM Corp. 2001, 2022
* Author(s): Robert Burroughs
* Eric Rossman (edrossma@us.ibm.com)
*
@@ -85,7 +85,7 @@ struct ica_rsa_modexpo_crt {
struct CPRBX {
__u16 cprb_len; /* CPRB length 220 */
__u8 cprb_ver_id; /* CPRB version id. 0x02 */
- __u8 pad_000[3]; /* Alignment pad bytes */
+ __u8 _pad_000[3]; /* Alignment pad bytes */
__u8 func_id[2]; /* function id 0x5432 */
__u8 cprb_flags[4]; /* Flags */
__u32 req_parml; /* request parameter buffer len */
@@ -95,19 +95,19 @@ struct CPRBX {
__u32 rpl_datal; /* reply data block len */
__u32 rpld_datal; /* replied data block len */
__u32 req_extbl; /* request extension block len */
- __u8 pad_001[4]; /* reserved */
+ __u8 _pad_001[4]; /* reserved */
__u32 rpld_extbl; /* replied extension block len */
- __u8 padx000[16 - sizeof(__u8 *)];
+ __u8 _pad_002[16 - sizeof(__u8 *)];
__u8 __user *req_parmb; /* request parm block 'address' */
- __u8 padx001[16 - sizeof(__u8 *)];
+ __u8 _pad_003[16 - sizeof(__u8 *)];
__u8 __user *req_datab; /* request data block 'address' */
- __u8 padx002[16 - sizeof(__u8 *)];
+ __u8 _pad_004[16 - sizeof(__u8 *)];
__u8 __user *rpl_parmb; /* reply parm block 'address' */
- __u8 padx003[16 - sizeof(__u8 *)];
+ __u8 _pad_005[16 - sizeof(__u8 *)];
__u8 __user *rpl_datab; /* reply data block 'address' */
- __u8 padx004[16 - sizeof(__u8 *)];
+ __u8 _pad_006[16 - sizeof(__u8 *)];
__u8 __user *req_extb; /* request extension block 'addr'*/
- __u8 padx005[16 - sizeof(__u8 *)];
+ __u8 _pad_007[16 - sizeof(__u8 *)];
__u8 __user *rpl_extb; /* reply extension block 'address'*/
__u16 ccp_rtcode; /* server return code */
__u16 ccp_rscode; /* server reason code */
@@ -115,12 +115,10 @@ struct CPRBX {
__u8 logon_id[8]; /* Logon Identifier */
__u8 mac_value[8]; /* Mac Value */
__u8 mac_content_flgs; /* Mac content flag byte */
- __u8 pad_002; /* Alignment */
+ __u8 _pad_008; /* Alignment */
__u16 domain; /* Domain */
- __u8 usage_domain[4]; /* Usage domain */
- __u8 cntrl_domain[4]; /* Control domain */
- __u8 S390enf_mask[4]; /* S/390 enforcement mask */
- __u8 pad_004[36]; /* reserved */
+ __u8 _pad_009[12]; /* reserved, checked for zeros */
+ __u8 _pad_010[36]; /* reserved */
} __attribute__((packed));
/**
@@ -238,8 +236,8 @@ struct zcrypt_device_matrix_ext {
};
#define AUTOSELECT 0xFFFFFFFF
-#define AUTOSEL_AP ((__u16) 0xFFFF)
-#define AUTOSEL_DOM ((__u16) 0xFFFF)
+#define AUTOSEL_AP ((__u16)0xFFFF)
+#define AUTOSEL_DOM ((__u16)0xFFFF)
#define ZCRYPT_IOCTL_MAGIC 'z'
@@ -288,7 +286,7 @@ struct zcrypt_device_matrix_ext {
* 0x08: CEX3A
* 0x0a: CEX4
* 0x0b: CEX5
- * 0x0c: CEX6 and CEX7
+ * 0x0c: CEX6, CEX7 or CEX8
* 0x0d: device is disabled
*
* ZCRYPT_QDEPTH_MASK
@@ -305,12 +303,12 @@ struct zcrypt_device_matrix_ext {
/**
* Supported ioctl calls
*/
-#define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
-#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
-#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
-#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+#define ICARSAMODEXPO _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
-#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
#define ZCRYPT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
#define ZCRYPT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
@@ -352,7 +350,7 @@ struct zcrypt_device_matrix {
};
/* Deprecated: use ZCRYPT_DEVICE_STATUS */
-#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+#define ZDEVICESTATUS _IOC(_IOC_READ | _IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
/* Deprecated: use ZCRYPT_STATUS_MASK */
#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
/* Deprecated: use ZCRYPT_QDEPTH_MASK */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 80f500ffb55c..5e6a23299790 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,16 +33,16 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
-extra-y += head64.o vmlinux.lds
+extra-y += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
@@ -55,10 +55,11 @@ compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
obj-$(CONFIG_COMPAT) += $(compat-obj-y)
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
-obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
+obj-$(CONFIG_KPROBES) += mcount.o
+obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
@@ -71,6 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..fb92e8ed0525
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+#define ABS_LOWCORE_UNMAPPED 1
+#define ABS_LOWCORE_LAP_ON 2
+#define ABS_LOWCORE_IRQS_ON 4
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+bool __ro_after_init abs_lowcore_mapped;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+ if (rc) {
+ /*
+ * Do not unmap allocated page tables in case the
+ * allocation was not requested. In such a case the
+ * request is expected coming from an atomic context,
+ * while the unmap attempt might sleep.
+ */
+ if (alloc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
+
+struct lowcore *get_abs_lowcore(unsigned long *flags)
+{
+ unsigned long irq_flags;
+ union ctlreg0 cr0;
+ int cpu;
+
+ *flags = 0;
+ cpu = get_cpu();
+ if (abs_lowcore_mapped) {
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+ } else {
+ if (cpu != 0)
+ panic("Invalid unmapped absolute lowcore access\n");
+ local_irq_save(irq_flags);
+ if (!irqs_disabled_flags(irq_flags))
+ *flags |= ABS_LOWCORE_IRQS_ON;
+ __ctl_store(cr0.val, 0, 0);
+ if (cr0.lap) {
+ *flags |= ABS_LOWCORE_LAP_ON;
+ __ctl_clear_bit(0, 28);
+ }
+ *flags |= ABS_LOWCORE_UNMAPPED;
+ return lowcore_ptr[0];
+ }
+}
+
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
+{
+ if (abs_lowcore_mapped) {
+ if (flags)
+ panic("Invalid mapped absolute lowcore release\n");
+ } else {
+ if (smp_processor_id() != 0)
+ panic("Invalid mapped absolute lowcore access\n");
+ if (!(flags & ABS_LOWCORE_UNMAPPED))
+ panic("Invalid unmapped absolute lowcore release\n");
+ if (flags & ABS_LOWCORE_LAP_ON)
+ __ctl_set_bit(0, 28);
+ if (flags & ABS_LOWCORE_IRQS_ON)
+ local_irq_enable();
+ }
+ put_cpu();
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index cce0ddee2d02..e7bca29f9c34 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -7,8 +7,6 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
-#define MAX_PATCH_LEN (255 - 1)
-
static int __initdata_or_module alt_instr_disabled;
static int __init disable_alternative_instructions(char *str)
@@ -19,85 +17,30 @@ static int __init disable_alternative_instructions(char *str)
early_param("noaltinstr", disable_alternative_instructions);
-struct brcl_insn {
- u16 opc;
- s32 disp;
-} __packed;
-
-static u16 __initdata_or_module nop16 = 0x0700;
-static u32 __initdata_or_module nop32 = 0x47000000;
-static struct brcl_insn __initdata_or_module nop48 = {
- 0xc004, 0
-};
-
-static const void *nops[] __initdata_or_module = {
- &nop16,
- &nop32,
- &nop48
-};
-
-static void __init_or_module add_jump_padding(void *insns, unsigned int len)
-{
- struct brcl_insn brcl = {
- 0xc0f4,
- len / 2
- };
-
- memcpy(insns, &brcl, sizeof(brcl));
- insns += sizeof(brcl);
- len -= sizeof(brcl);
-
- while (len > 0) {
- memcpy(insns, &nop16, 2);
- insns += 2;
- len -= 2;
- }
-}
-
-static void __init_or_module add_padding(void *insns, unsigned int len)
-{
- if (len > 6)
- add_jump_padding(insns, len);
- else if (len >= 2)
- memcpy(insns, nops[len / 2 - 1], len);
-}
-
static void __init_or_module __apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
struct alt_instr *a;
u8 *instr, *replacement;
- u8 insnbuf[MAX_PATCH_LEN];
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
- int insnbuf_sz = 0;
-
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
if (!__test_facility(a->facility, alt_stfle_fac_list))
continue;
- if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
+ if (unlikely(a->instrlen % 2)) {
WARN_ONCE(1, "cpu alternatives instructions length is "
"odd, skipping patching\n");
continue;
}
- memcpy(insnbuf, replacement, a->replacementlen);
- insnbuf_sz = a->replacementlen;
-
- if (a->instrlen > a->replacementlen) {
- add_padding(insnbuf + a->replacementlen,
- a->instrlen - a->replacementlen);
- insnbuf_sz += a->instrlen - a->replacementlen;
- }
-
- s390_kernel_write(instr, insnbuf, insnbuf_sz);
+ s390_kernel_write(instr, replacement, a->instrlen);
}
}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 8e00bb228662..d8ce965c0a97 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -32,6 +32,22 @@ int main(void)
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
OFFSET(__PT_GPRS, pt_regs, gprs);
+ OFFSET(__PT_R0, pt_regs, gprs[0]);
+ OFFSET(__PT_R1, pt_regs, gprs[1]);
+ OFFSET(__PT_R2, pt_regs, gprs[2]);
+ OFFSET(__PT_R3, pt_regs, gprs[3]);
+ OFFSET(__PT_R4, pt_regs, gprs[4]);
+ OFFSET(__PT_R5, pt_regs, gprs[5]);
+ OFFSET(__PT_R6, pt_regs, gprs[6]);
+ OFFSET(__PT_R7, pt_regs, gprs[7]);
+ OFFSET(__PT_R8, pt_regs, gprs[8]);
+ OFFSET(__PT_R9, pt_regs, gprs[9]);
+ OFFSET(__PT_R10, pt_regs, gprs[10]);
+ OFFSET(__PT_R11, pt_regs, gprs[11]);
+ OFFSET(__PT_R12, pt_regs, gprs[12]);
+ OFFSET(__PT_R13, pt_regs, gprs[13]);
+ OFFSET(__PT_R14, pt_regs, gprs[14]);
+ OFFSET(__PT_R15, pt_regs, gprs[15]);
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
@@ -41,18 +57,16 @@ int main(void)
/* stack_frame offsets */
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
- OFFSET(__SF_EMPTY, stack_frame, empty1[0]);
- OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]);
- OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]);
- OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
- OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
+ OFFSET(__SF_EMPTY, stack_frame, empty[0]);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
+ OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+ OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
/* idle data offsets */
OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
- OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
- OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
BLANK();
/* hardware defined lowcore locations 0x000 - 0x1ff */
@@ -123,14 +137,12 @@ int main(void)
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
- OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
- OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
- OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap);
- OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
OFFSET(__LC_LAST_BREAK, lowcore, last_break);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
+ OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
+ OFFSET(__LC_OS_INFO, lowcore, os_info);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
OFFSET(__LC_MCESAD, lowcore, mcesad);
OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
deleted file mode 100644
index d255c69c1779..000000000000
--- a/arch/s390/kernel/base.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/s390/kernel/base.S
- *
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- * Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/ptrace.h>
-
- GEN_BR_THUNK %r9
- GEN_BR_THUNK %r14
-
-ENTRY(s390_base_pgm_handler)
- stmg %r0,%r15,__LC_SAVE_AREA_SYNC
- basr %r13,0
-0: aghi %r15,-STACK_FRAME_OVERHEAD
- larl %r1,s390_base_pgm_handler_fn
- lg %r9,0(%r1)
- ltgr %r9,%r9
- jz 1f
- BASR_EX %r14,%r9
- lmg %r0,%r15,__LC_SAVE_AREA_SYNC
- lpswe __LC_PGM_OLD_PSW
-1: lpswe disabled_wait_psw-0b(%r13)
-ENDPROC(s390_base_pgm_handler)
-
- .align 8
-disabled_wait_psw:
- .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
-
- .section .bss
- .align 8
- .globl s390_base_pgm_handler_fn
-s390_base_pgm_handler_fn:
- .quad 0
- .previous
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index d66825e53fce..7ee3651d00ab 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -3,7 +3,6 @@
* Extract CPU cache information and expose them via sysfs.
*
* Copyright IBM Corp. 2012
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#include <linux/seq_file.h>
@@ -71,8 +70,6 @@ void show_cacheinfo(struct seq_file *m)
struct cacheinfo *cache;
int idx;
- if (!test_facility(34))
- return;
this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
cache = this_cpu_ci->info_list + idx;
@@ -132,8 +129,6 @@ int init_cache_level(unsigned int cpu)
union cache_topology ct;
enum cache_type ctype;
- if (!test_facility(34))
- return -EOPNOTSUPP;
if (!this_cpu_ci)
return -EINVAL;
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
@@ -157,8 +152,6 @@ int populate_cache_leaves(unsigned int cpu)
union cache_topology ct;
enum cache_type ctype;
- if (!test_facility(34))
- return -EOPNOTSUPP;
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
idx < this_cpu_ci->num_leaves; idx++, level++) {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 64509e7dbd3b..ef23739b277c 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -5,69 +5,59 @@
#include <linux/compat.h>
#include <linux/socket.h>
#include <linux/syscalls.h>
+#include <asm/ptrace.h>
-/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
-/* to a 64 bit pointer */
-#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
-#define AA(__x) \
- ((unsigned long)(__x))
+/*
+ * Macro that masks the high order bit of a 32 bit pointer and
+ * converts it to a 64 bit pointer.
+ */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x) ((unsigned long)(__x))
/* Now 32bit compatibility types */
struct ipc_kludge_32 {
- __u32 msgp; /* pointer */
- __s32 msgtyp;
+ __u32 msgp; /* pointer */
+ __s32 msgtyp;
};
/* asm/sigcontext.h */
-typedef union
-{
- __u64 d;
- __u32 f;
+typedef union {
+ __u64 d;
+ __u32 f;
} freg_t32;
-typedef struct
-{
+typedef struct {
unsigned int fpc;
unsigned int pad;
- freg_t32 fprs[__NUM_FPRS];
+ freg_t32 fprs[__NUM_FPRS];
} _s390_fp_regs32;
-typedef struct
-{
- __u32 mask;
- __u32 addr;
-} _psw_t32 __attribute__ ((aligned(8)));
-
-typedef struct
-{
- _psw_t32 psw;
+typedef struct {
+ psw_t32 psw;
__u32 gprs[__NUM_GPRS];
__u32 acrs[__NUM_ACRS];
} _s390_regs_common32;
-typedef struct
-{
+typedef struct {
_s390_regs_common32 regs;
- _s390_fp_regs32 fpregs;
+ _s390_fp_regs32 fpregs;
} _sigregs32;
-typedef struct
-{
- __u32 gprs_high[__NUM_GPRS];
- __u64 vxrs_low[__NUM_VXRS_LOW];
- __vector128 vxrs_high[__NUM_VXRS_HIGH];
- __u8 __reserved[128];
+typedef struct {
+ __u32 gprs_high[__NUM_GPRS];
+ __u64 vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ __u8 __reserved[128];
} _sigregs_ext32;
#define _SIGCONTEXT_NSIG32 64
#define _SIGCONTEXT_NSIG_BPW32 32
#define __SIGNAL_FRAMESIZE32 96
-#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2)
+#define _SIGMASK_COPY_SIZE32 (sizeof(u32) * 2)
-struct sigcontext32
-{
+struct sigcontext32 {
__u32 oldmask[_COMPAT_NSIG_WORDS];
- __u32 sregs; /* pointer */
+ __u32 sregs; /* pointer */
};
/* asm/signal.h */
@@ -75,11 +65,11 @@ struct sigcontext32
/* asm/ucontext.h */
struct ucontext32 {
__u32 uc_flags;
- __u32 uc_link; /* pointer */
+ __u32 uc_link; /* pointer */
compat_stack_t uc_stack;
_sigregs32 uc_mcontext;
compat_sigset_t uc_sigmask;
- /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
unsigned char __unused[128 - sizeof(compat_sigset_t)];
_sigregs_ext32 uc_mcontext_ext;
};
@@ -88,25 +78,6 @@ struct stat64_emu31;
struct mmap_arg_struct_emu31;
struct fadvise64_64_args;
-long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
-long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
-long compat_sys_s390_setregid16(u16 rgid, u16 egid);
-long compat_sys_s390_setgid16(u16 gid);
-long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
-long compat_sys_s390_setuid16(u16 uid);
-long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
-long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
-long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
-long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
-long compat_sys_s390_setfsuid16(u16 uid);
-long compat_sys_s390_setfsgid16(u16 gid);
-long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
-long compat_sys_s390_getuid16(void);
-long compat_sys_s390_geteuid16(void);
-long compat_sys_s390_getgid16(void);
-long compat_sys_s390_getegid16(void);
long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
@@ -118,8 +89,8 @@ long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbu
long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
-long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
-long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index cca142fbb516..eee1ad3e1b29 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -89,7 +89,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
_sigregs32 user_sregs;
int i;
- /* Alwys make any pending restarted system call return -EINTR */
+ /* Always make any pending restarted system call return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..1b2ae42a0c15
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/elf.h>
+
+enum {
+ TYPE_HWCAP,
+ TYPE_FACILITY,
+};
+
+struct s390_cpu_feature {
+ unsigned int type : 4;
+ unsigned int num : 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+ [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+ [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+ [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ struct s390_cpu_feature *feature;
+
+ if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+ return 0;
+ feature = &s390_cpu_features[num];
+ switch (feature->type) {
+ case TYPE_HWCAP:
+ return !!(elf_hwcap & BIT(feature->num));
+ case TYPE_FACILITY:
+ return test_facility(feature->num);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 785d54c9350c..dd74fe664ed1 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -15,11 +15,13 @@
#include <linux/slab.h>
#include <linux/memblock.h>
#include <linux/elf.h>
+#include <linux/uio.h>
#include <asm/asm-offsets.h>
#include <asm/os_info.h>
#include <asm/elf.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
+#include <asm/maccess.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -60,9 +62,9 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
struct save_area *sa;
- sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
+ sa = memblock_alloc(sizeof(*sa), 8);
if (!sa)
- panic("Failed to allocate save area\n");
+ return NULL;
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -113,121 +115,60 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-/*
- * Return physical address for virtual address
- */
-static inline void *load_real_addr(void *addr)
-{
- unsigned long real_addr;
-
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_addr) : "a" (addr) : "cc");
- return (void *)real_addr;
-}
-
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, void *src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long from, len;
- void *ra;
- int rc;
+ size_t len, copied, res = 0;
while (count) {
- from = __pa(src);
- if (!oldmem_data.start && from < sclp.hsa_size) {
+ if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - from);
- rc = memcpy_hsa_kernel(dst, from, len);
- if (rc)
- return rc;
+ len = min(count, sclp.hsa_size - src);
+ copied = memcpy_hsa_iter(iter, src, len);
} else {
/* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
- from -= oldmem_data.start;
- len = min(count, oldmem_data.size - from);
- } else if (oldmem_data.start && from < oldmem_data.size) {
- len = min(count, oldmem_data.size - from);
- from += oldmem_data.start;
+ if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
+ src -= oldmem_data.start;
+ len = min(count, oldmem_data.size - src);
+ } else if (oldmem_data.start && src < oldmem_data.size) {
+ len = min(count, oldmem_data.size - src);
+ src += oldmem_data.start;
} else {
len = count;
}
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, (void *) from, len))
- return -EFAULT;
+ copied = memcpy_real_iter(iter, src, len);
}
- dst += len;
- src += len;
- count -= len;
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- return 0;
+ return res;
}
-/*
- * Copy memory of the old, dumped system to a user space virtual address
- */
-static int copy_oldmem_user(void __user *dst, void *src, size_t count)
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
{
- unsigned long from, len;
- int rc;
+ struct iov_iter iter;
+ struct kvec kvec;
- while (count) {
- from = __pa(src);
- if (!oldmem_data.start && from < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - from);
- rc = memcpy_hsa_user(dst, from, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && from - oldmem_data.start < oldmem_data.size) {
- from -= oldmem_data.start;
- len = min(count, oldmem_data.size - from);
- } else if (oldmem_data.start && from < oldmem_data.size) {
- len = min(count, oldmem_data.size - from);
- from += oldmem_data.start;
- } else {
- len = count;
- }
- rc = copy_to_user_real(dst, (void *) from, count);
- if (rc)
- return rc;
- }
- dst += len;
- src += len;
- count -= len;
- }
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
return 0;
}
/*
* Copy one page from "oldmem"
*/
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
- unsigned long offset, int userbuf)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
+ unsigned long offset)
{
- void *src;
- int rc;
+ unsigned long src;
- if (!csize)
- return 0;
- src = (void *) (pfn << PAGE_SHIFT) + offset;
- if (userbuf)
- rc = copy_oldmem_user((void __force __user *) buf, src, csize);
- else
- rc = copy_oldmem_kernel((void *) buf, src, csize);
- return rc;
+ src = pfn_to_phys(pfn) + offset;
+ return copy_oldmem_iter(iter, src, csize);
}
/*
@@ -429,10 +370,10 @@ static void *nt_prpsinfo(void *ptr)
static void *get_vmcoreinfo_old(unsigned long *size)
{
char nt_name[11], *vmcoreinfo;
+ unsigned long addr;
Elf64_Nhdr note;
- void *addr;
- if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr)))
return NULL;
memset(nt_name, 0, sizeof(nt_name));
if (copy_oldmem_kernel(&note, addr, sizeof(note)))
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..d7a82066a638 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strlcpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name, sizeof(rc->name));
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 76a656b2146f..a778714e4d8b 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -11,6 +11,7 @@
#include <linux/cpu.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/trace/diag.h>
#include <asm/sections.h>
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index ec5515423f17..90bbb4ea1d08 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -278,6 +278,7 @@ static const unsigned char formats[][6] = {
[INSTR_SIL_RDI] = { D_20, B_16, I16_32, 0, 0, 0 },
[INSTR_SIL_RDU] = { D_20, B_16, U16_32, 0, 0, 0 },
[INSTR_SIY_IRD] = { D20_20, B_16, I8_8, 0, 0, 0 },
+ [INSTR_SIY_RD] = { D20_20, B_16, 0, 0, 0, 0 },
[INSTR_SIY_URD] = { D20_20, B_16, U8_8, 0, 0, 0 },
[INSTR_SI_RD] = { D_20, B_16, 0, 0, 0, 0 },
[INSTR_SI_URD] = { D_20, B_16, U8_8, 0, 0, 0 },
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 0681c55e831d..1e3233eb510a 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -224,5 +224,5 @@ void __noreturn die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception: panic_on_oops");
oops_exit();
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3cdf68c53614..6030fdd6997b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -2,7 +2,6 @@
/*
* Copyright IBM Corp. 2007, 2009
* Author(s): Hongjie Yang <hongjie@us.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#define KMSG_COMPONENT "setup"
@@ -18,6 +17,7 @@
#include <linux/pfn.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
@@ -149,34 +149,21 @@ static __init void setup_topology(void)
topology_max_mnest = max_mnest;
}
-static void early_pgm_check_handler(void)
+void __do_early_pgm_check(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
- unsigned long cr0, cr0_new;
- unsigned long addr;
-
- addr = S390_lowcore.program_old_psw.addr;
- fixup = s390_search_extables(addr);
- if (!fixup)
+ if (!fixup_exception(regs))
disabled_wait();
- /* Disable low address protection before storing into lowcore. */
- __ctl_store(cr0, 0, 0);
- cr0_new = cr0 & ~(1UL << 28);
- __ctl_load(cr0_new, 0, 0);
- S390_lowcore.program_old_psw.addr = extable_fixup(fixup);
- __ctl_load(cr0, 0, 0);
}
static noinline __init void setup_lowcore_early(void)
{
psw_t psw;
- psw.addr = (unsigned long)s390_base_pgm_handler;
+ psw.addr = (unsigned long)early_pgm_check_handler;
psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
if (IS_ENABLED(CONFIG_KASAN))
psw.mask |= PSW_MASK_DAT;
S390_lowcore.program_new_psw = psw;
- s390_base_pgm_handler_fn = early_pgm_check_handler;
S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
@@ -280,7 +267,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
/* copy arch command line */
- strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
}
static void __init check_image_bootable(void)
@@ -294,6 +281,11 @@ static void __init check_image_bootable(void)
disabled_wait();
}
+static void __init sort_amode31_extable(void)
+{
+ sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
+}
+
void __init startup_init(void)
{
sclp_early_adjust_va();
@@ -302,6 +294,7 @@ void __init startup_init(void)
time_early_init();
init_kernel_storage_key();
lockdep_off();
+ sort_amode31_extable();
setup_lowcore_early();
setup_facility_list();
detect_machine_type();
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
new file mode 100644
index 000000000000..f521c6da37b8
--- /dev/null
+++ b/arch/s390/kernel/earlypgm.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2006, 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+ENTRY(early_pgm_check_handler)
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ lpswe __LC_RETURN_PSW
+ENDPROC(early_pgm_check_handler)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 01bae1d51113..d2a1f2f4f5b8 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -6,11 +6,11 @@
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Hartmut Penner (hp@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/asm-extable.h>
#include <asm/alternative-asm.h>
#include <asm/processor.h>
#include <asm/cache.h>
@@ -29,23 +29,6 @@
#include <asm/export.h>
#include <asm/nospec-insn.h>
-__PT_R0 = __PT_GPRS
-__PT_R1 = __PT_GPRS + 8
-__PT_R2 = __PT_GPRS + 16
-__PT_R3 = __PT_GPRS + 24
-__PT_R4 = __PT_GPRS + 32
-__PT_R5 = __PT_GPRS + 40
-__PT_R6 = __PT_GPRS + 48
-__PT_R7 = __PT_GPRS + 56
-__PT_R8 = __PT_GPRS + 64
-__PT_R9 = __PT_GPRS + 72
-__PT_R10 = __PT_GPRS + 80
-__PT_R11 = __PT_GPRS + 88
-__PT_R12 = __PT_GPRS + 96
-__PT_R13 = __PT_GPRS + 104
-__PT_R14 = __PT_GPRS + 112
-__PT_R15 = __PT_GPRS + 120
-
STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
@@ -53,19 +36,19 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
.endm
.macro LBEAR address
- ALTERNATIVE "", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
.endm
.macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193
+ ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
.endm
.macro MBEAR reg
- ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
.endm
.macro CHECK_STACK savearea
@@ -98,11 +81,6 @@ _LPP_OFFSET = __LC_LPP
#endif
.endm
- .macro STCK savearea
- ALTERNATIVE ".insn s,0xb2050000,\savearea", \
- ".insn s,0xb27c0000,\savearea", 25
- .endm
-
/*
* The TSTMSK macro generates a test-under-mask instruction by
* calculating the memory offset for the specified mask value.
@@ -126,22 +104,22 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "", ".long 0xb2e8c000", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
.endm
.macro BPON
- ALTERNATIVE "", ".long 0xb2e8d000", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
.endm
.macro BPENTER tif_ptr,tif_mask
- ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
- "", 82
+ ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
+ "j .+12; nop; nop", 82
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
- ALTERNATIVE "jz .+8; .long 0xb2e8c000", \
- "jnz .+8; .long 0xb2e8d000", 82
+ ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
.endm
/*
@@ -177,9 +155,19 @@ _LPP_OFFSET = __LC_LPP
lgr %r14,\reg
larl %r13,\start
slgr %r14,%r13
- lghi %r13,\end - \start
- clgr %r14,%r13
+#ifdef CONFIG_AS_IS_LLVM
+ clgfrl %r14,.Lrange_size\@
+#else
+ clgfi %r14,\end - \start
+#endif
jhe \outside_label
+#ifdef CONFIG_AS_IS_LLVM
+ .section .rodata, "a"
+ .align 4
+.Lrange_size\@:
+ .long \end - \start
+ .previous
+#endif
.endm
.macro SIEEXIT
@@ -191,7 +179,6 @@ _LPP_OFFSET = __LC_LPP
#endif
GEN_BR_THUNK %r14
- GEN_BR_THUNK %r14,%r13
.section .kprobes.text, "ax"
.Ldummy:
@@ -232,7 +219,7 @@ ENTRY(__switch_to)
aghi %r3,__TASK_pid
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
ENDPROC(__switch_to)
@@ -264,6 +251,10 @@ ENTRY(sie64a)
BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
+# Let the next instruction be NOP to avoid triggering a machine check
+# and handling it in a guest as result of the instruction execution.
+ nopr 7
+.Lsie_leave:
BPOFF
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
@@ -443,7 +434,7 @@ ENDPROC(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
ENTRY(\name)
- STCK __LC_INT_CLOCK
+ stckf __LC_INT_CLOCK
stpt __LC_SYS_ENTER_TIMER
STBEAR __LC_LAST_BREAK
BPOFF
@@ -479,10 +470,7 @@ ENTRY(\name)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
MBEAR %r11
stmg %r8,%r9,__PT_PSW(%r11)
- tm %r8,0x0001 # coming from user space?
- jno 1f
- lctlg %c1,%c1,__LC_KERNEL_ASCE
-1: lgr %r2,%r11 # pass pointer to pt_regs
+ lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
@@ -515,7 +503,7 @@ ENTRY(psw_idle)
.Lpsw_idle_stcctm:
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
BPON
- STCK __CLOCK_IDLE_ENTER(%r2)
+ stckf __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
lpswe __SF_EMPTY(%r15)
.globl psw_idle_exit
@@ -527,7 +515,7 @@ ENDPROC(psw_idle)
* Machine check handler routines
*/
ENTRY(mcck_int_handler)
- STCK __LC_MCCK_CLOCK
+ stckf __LC_MCCK_CLOCK
BPOFF
la %r1,4095 # validate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
@@ -563,7 +551,7 @@ ENTRY(mcck_int_handler)
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f
- OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f
+ OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
j 5f
4: CHKSTG .Lmcck_panic
@@ -608,6 +596,7 @@ ENTRY(mcck_int_handler)
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r2,%r11
lgr %r15,%r1
brasl %r14,s390_handle_mcck
.Lmcck_return:
@@ -618,7 +607,7 @@ ENTRY(mcck_int_handler)
jno 0f
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -654,7 +643,7 @@ ENTRY(mcck_int_handler)
ENDPROC(mcck_int_handler)
ENTRY(restart_int_handler)
- ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 6083090be1f4..995ec7449feb 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -5,6 +5,7 @@
#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/signal.h>
+#include <asm/extable.h>
#include <asm/ptrace.h>
#include <asm/idle.h>
@@ -16,10 +17,12 @@ void ext_int_handler(void);
void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
+void early_pgm_check_handler(void);
void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
void __do_pgm_check(struct pt_regs *regs);
void __do_syscall(struct pt_regs *regs, int per_trap);
+void __do_early_pgm_check(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 5510c7d10ddc..416b5a94353d 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -4,8 +4,7 @@
*
* Copyright IBM Corp. 2009,2014
*
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <linux/moduleloader.h>
@@ -61,18 +60,9 @@ asm(
#ifdef CONFIG_EXPOLINE
asm(
" .align 16\n"
- "ftrace_shared_hotpatch_trampoline_ex:\n"
- " lmg %r0,%r1,2(%r1)\n"
- " ex %r0," __stringify(__LC_BR_R1) "(%r0)\n"
- " j .\n"
- "ftrace_shared_hotpatch_trampoline_ex_end:\n"
-);
-
-asm(
- " .align 16\n"
"ftrace_shared_hotpatch_trampoline_exrl:\n"
" lmg %r0,%r1,2(%r1)\n"
- " .insn ril,0xc60000000000,%r0,0f\n" /* exrl */
+ " exrl %r0,0f\n"
" j .\n"
"0: br %r1\n"
"ftrace_shared_hotpatch_trampoline_exrl_end:\n"
@@ -91,12 +81,8 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
tend = ftrace_shared_hotpatch_trampoline_br_end;
#ifdef CONFIG_EXPOLINE
if (!nospec_disable) {
- tstart = ftrace_shared_hotpatch_trampoline_ex;
- tend = ftrace_shared_hotpatch_trampoline_ex_end;
- if (test_facility(35)) { /* exrl */
- tstart = ftrace_shared_hotpatch_trampoline_exrl;
- tend = ftrace_shared_hotpatch_trampoline_exrl_end;
- }
+ tstart = ftrace_shared_hotpatch_trampoline_exrl;
+ tend = ftrace_shared_hotpatch_trampoline_exrl_end;
}
#endif /* CONFIG_EXPOLINE */
if (end)
@@ -159,37 +145,73 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return 0;
}
+static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec)
+{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ struct ftrace_insn insn;
+ s64 disp;
+ u16 opc;
+
+ if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn)))
+ return ERR_PTR(-EFAULT);
+ disp = (s64)insn.disp * 2;
+ trampoline = (void *)(rec->ip + disp);
+ if (get_kernel_nofault(opc, &trampoline->brasl_opc))
+ return ERR_PTR(-EFAULT);
+ if (opc != 0xc015)
+ return ERR_PTR(-EINVAL);
+ return trampoline;
+}
+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
+ struct ftrace_hotpatch_trampoline *trampoline;
+ u64 old;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ if (get_kernel_nofault(old, &trampoline->interceptor))
+ return -EFAULT;
+ if (old != old_addr)
+ return -EINVAL;
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
return 0;
}
-static void brcl_disable(void *brcl)
+static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
{
- u8 op = 0x04; /* set mask field to zero */
+ u16 old;
+ u8 op;
- s390_kernel_write((char *)brcl + 1, &op, sizeof(op));
+ if (get_kernel_nofault(old, addr))
+ return -EFAULT;
+ if (old != expected)
+ return -EINVAL;
+ /* set mask field to all ones or zeroes */
+ op = enable ? 0xf4 : 0x04;
+ s390_kernel_write((char *)addr + 1, &op, sizeof(op));
+ return 0;
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- brcl_disable((void *)rec->ip);
- return 0;
-}
-
-static void brcl_enable(void *brcl)
-{
- u8 op = 0xf4; /* set mask field to all ones */
-
- s390_kernel_write((char *)brcl + 1, &op, sizeof(op));
+ /* Expect brcl 0xf,... */
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- brcl_enable((void *)rec->ip);
- return 0;
+ struct ftrace_hotpatch_trampoline *trampoline;
+
+ trampoline = ftrace_get_trampoline(rec);
+ if (IS_ERR(trampoline))
+ return PTR_ERR(trampoline);
+ s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+ /* Expect brcl 0x0,... */
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -203,14 +225,13 @@ void arch_ftrace_update_code(int command)
ftrace_modify_all_code(command);
}
-int ftrace_arch_code_modify_post_process(void)
+void ftrace_arch_code_modify_post_process(void)
{
/*
* Flush any pre-fetched instructions on all
* CPUs to make the new code visible.
*/
text_poke_sync_lock();
- return 0;
}
#ifdef CONFIG_MODULES
@@ -262,14 +283,24 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
*/
int ftrace_enable_ftrace_graph_caller(void)
{
- brcl_disable(ftrace_graph_caller);
+ int rc;
+
+ /* Expect brc 0xf,... */
+ rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
+ if (rc)
+ return rc;
text_poke_sync_lock();
return 0;
}
int ftrace_disable_ftrace_graph_caller(void)
{
- brcl_enable(ftrace_graph_caller);
+ int rc;
+
+ /* Expect brc 0x0,... */
+ rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
+ if (rc)
+ return rc;
text_poke_sync_lock();
return 0;
}
@@ -290,9 +321,8 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
return;
regs = ftrace_get_regs(fregs);
- preempt_disable_notrace();
p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
+ if (!regs || unlikely(!p) || kprobe_disabled(p))
goto out;
if (kprobe_running()) {
@@ -318,7 +348,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
}
__this_cpu_write(current_kprobe, NULL);
out:
- preempt_enable_notrace();
ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 69e416f4c6b0..7f75a9616406 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -16,8 +16,6 @@ extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[];
extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[];
extern const char ftrace_shared_hotpatch_trampoline_br[];
extern const char ftrace_shared_hotpatch_trampoline_br_end[];
-extern const char ftrace_shared_hotpatch_trampoline_ex[];
-extern const char ftrace_shared_hotpatch_trampoline_ex_end[];
extern const char ftrace_shared_hotpatch_trampoline_exrl[];
extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
extern const char ftrace_plt_template[];
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 42f9a325a257..d7b8b6ad574d 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -5,7 +5,6 @@
* Author(s): Hartmut Penner <hp@de.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* Rob van der Heij <rvdhei@iae.nl>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
*
*/
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 5ad1dde23dc5..325cbf69ebbd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -4,7 +4,6 @@
*
* Copyright IBM Corp. 2005, 2012
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
* Volker Sameske <sameske@de.ibm.com>
*/
@@ -20,6 +19,7 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
@@ -29,6 +29,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- mem_assign_absolute(S390_lowcore.ipib, ipib);
- mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->ipib = ipib;
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc, flags);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 0df83ecaa2e0..45393919fe61 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -138,7 +138,7 @@ void noinstr do_io_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
int from_idle;
- irq_enter();
+ irq_enter_rcu();
if (user_mode(regs)) {
update_timer_sys();
@@ -158,7 +158,8 @@ void noinstr do_io_irq(struct pt_regs *regs)
do_irq_async(regs, IO_INTERRUPT);
} while (MACHINE_IS_LPAR && irq_pending(regs));
- irq_exit();
+ irq_exit_rcu();
+
set_irq_regs(old_regs);
irqentry_exit(regs, state);
@@ -172,7 +173,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
int from_idle;
- irq_enter();
+ irq_enter_rcu();
if (user_mode(regs)) {
update_timer_sys();
@@ -190,7 +191,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
do_irq_async(regs, EXT_INTERRUPT);
- irq_exit();
+ irq_exit_rcu();
set_irq_regs(old_regs);
irqentry_exit(regs, state);
@@ -204,7 +205,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
unsigned long flags;
int cpu;
- irq_lock_sparse();
+ rcu_read_lock();
desc = irq_to_desc(irq);
if (!desc)
goto out;
@@ -223,7 +224,7 @@ static void show_msi_interrupt(struct seq_file *p, int irq)
seq_putc(p, '\n');
raw_spin_unlock_irqrestore(&desc->lock, flags);
out:
- irq_unlock_sparse();
+ rcu_read_unlock();
}
/*
@@ -341,7 +342,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
struct ext_int_info *p;
int index;
- ext_code = *(struct ext_code *) &regs->int_code;
+ ext_code.int_code = regs->int_code;
if (ext_code.code != EXT_IRQ_CLK_COMP)
set_cpu_flag(CIF_NOHZ_DELAY);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 6bec000c6c1c..e808bb8bc0da 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,14 +44,8 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
panic("Corrupted kernel text");
}
-static struct insn orignop = {
- .opcode = 0xc004,
- .offset = JUMP_LABEL_NOP_OFFSET >> 1,
-};
-
static void jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+ enum jump_label_type type)
{
void *code = (void *)jump_entry_code(entry);
struct insn old, new;
@@ -63,27 +57,22 @@ static void jump_label_transform(struct jump_entry *entry,
jump_label_make_branch(entry, &old);
jump_label_make_nop(entry, &new);
}
- if (init) {
- if (memcmp(code, &orignop, sizeof(orignop)))
- jump_label_bug(entry, &orignop, &new);
- } else {
- if (memcmp(code, &old, sizeof(old)))
- jump_label_bug(entry, &old, &new);
- }
+ if (memcmp(code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
s390_kernel_write(code, &new, sizeof(new));
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
text_poke_sync();
}
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
- jump_label_transform(entry, type, 0);
+ jump_label_transform(entry, type);
return true;
}
@@ -91,10 +80,3 @@ void arch_jump_label_transform_apply(void)
{
text_poke_sync();
}
-
-void __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
- enum jump_label_type type)
-{
- jump_label_transform(entry, type, 1);
- text_poke_sync();
-}
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index e27a7d3b0364..0032bdbe8e3f 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -284,11 +284,11 @@ NOKPROBE_SYMBOL(pop_kprobe);
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
- ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
- ri->fp = NULL;
+ ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
+ ri->fp = (void *)regs->gprs[15];
/* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long) &__kretprobe_trampoline;
+ regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
@@ -372,33 +372,26 @@ static int kprobe_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kprobe_handler);
-/*
- * Function return probe trampoline:
- * - init_kprobes() establishes a probepoint here
- * - When the probed function returns, this probe
- * causes the handlers to fire
- */
-static void __used kretprobe_trampoline_holder(void)
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+ kprobe_opcode_t *correct_ret_addr)
{
- asm volatile(".global __kretprobe_trampoline\n"
- "__kretprobe_trampoline: bcr 0,0\n");
+ /* Replace fake return address with real one. */
+ regs->gprs[14] = (unsigned long)correct_ret_addr;
}
+NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
/*
- * Called when the probe at kretprobe trampoline is hit
+ * Called from __kretprobe_trampoline
*/
-static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+void trampoline_probe_handler(struct pt_regs *regs)
{
- regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL);
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+ kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
}
NOKPROBE_SYMBOL(trampoline_probe_handler);
+/* assembler function that handles the kretprobes must not be probed itself */
+NOKPROBE_SYMBOL(__kretprobe_trampoline);
+
/*
* Called after single-stepping. p->addr is the address of the
* instruction whose first byte has been replaced by the "breakpoint"
@@ -465,7 +458,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
struct kprobe *p = kprobe_running();
- const struct exception_table_entry *entry;
switch(kcb->kprobe_status) {
case KPROBE_HIT_SS:
@@ -487,10 +479,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- entry = s390_search_extables(regs->psw.addr);
- if (entry && ex_handle(entry, regs))
+ if (fixup_exception(regs))
return 1;
-
/*
* fixup_exception() could not handle it,
* Let do_page_fault() fix it.
@@ -554,18 +544,13 @@ int kprobe_exceptions_notify(struct notifier_block *self,
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-static struct kprobe trampoline = {
- .addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
int __init arch_init_kprobes(void)
{
- return register_kprobe(&trampoline);
+ return 0;
}
int arch_trampoline_kprobe(struct kprobe *p)
{
- return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline;
+ return 0;
}
NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 3b895971c3d0..6652e54cf3db 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -88,8 +88,7 @@ static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
if (stsi(si, 2, 2, 2))
return;
cpascii(lgr_info->name, si->name, sizeof(si->name));
- memcpy(&lgr_info->lpar_number, &si->lpar_number,
- sizeof(lgr_info->lpar_number));
+ lgr_info->lpar_number = si->lpar_number;
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 0505e55a6297..4579b42286d5 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -3,7 +3,6 @@
* Copyright IBM Corp. 2005, 2011
*
* Author(s): Rolf Adelsberger,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
* Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
@@ -22,13 +21,16 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/nmi.h>
+#include <asm/sclp.h>
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long,
+ unsigned long);
extern const unsigned char relocate_kernel[];
extern const unsigned long long relocate_kernel_len;
@@ -55,7 +57,7 @@ static void __do_machine_kdump(void *image)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy((void *) __LC_FPREGS_SAVE_AREA,
+ memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
(void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
__load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
@@ -86,7 +88,7 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
- mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (MACHINE_HAS_VX)
save_vx_regs((__vector128 *) mcesa->vector_save_area);
if (MACHINE_HAS_GS) {
@@ -221,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ struct lowcore *abs_lc;
+ unsigned long flags;
+
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc, flags);
}
void machine_shutdown(void)
@@ -244,6 +251,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
*/
static void __do_machine_kexec(void *data)
{
+ unsigned long diag308_subcode;
relocate_kernel_t data_mover;
struct kimage *image = data;
@@ -252,7 +260,10 @@ static void __do_machine_kexec(void *data)
__arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
/* Call the moving routine */
- (*data_mover)(&image->head, image->start);
+ diag308_subcode = DIAG308_CLEAR_RESET;
+ if (sclp.has_iplcc)
+ diag308_subcode |= DIAG308_FLAG_EI;
+ (*data_mover)(&image->head, image->start, diag308_subcode);
/* Die if kexec returns */
disabled_wait();
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 9975ad200d74..fc6d5f58debe 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -7,6 +7,8 @@
* Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/elf.h>
#include <linux/errno.h>
#include <linux/kexec.h>
@@ -29,6 +31,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
struct module_signature *ms;
unsigned long sig_len;
+ int ret;
/* Skip signature verification when not secure IPLed. */
if (!ipl_secure_flag)
@@ -63,11 +66,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
return -EBADMSG;
}
- return verify_pkcs7_signature(kernel, kernel_len,
- kernel + kernel_len, sig_len,
- VERIFY_USE_PLATFORM_KEYRING,
- VERIFYING_MODULE_SIGNATURE,
- NULL, NULL);
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+ ret = verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
+ return ret;
}
#endif /* CONFIG_KEXEC_SIG */
@@ -290,8 +300,16 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab)
{
+ const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
Elf_Rela *relas;
int i, r_type;
+ int ret;
+
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
relas = (void *)pi->ehdr + relsec->sh_offset;
@@ -304,15 +322,27 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
sym = (void *)pi->ehdr + symtab->sh_offset;
sym += ELF64_R_SYM(relas[i].r_info);
- if (sym->st_shndx == SHN_UNDEF)
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ if (sym->st_shndx == SHN_UNDEF) {
+ pr_err("Undefined symbol: %s\n", name);
return -ENOEXEC;
+ }
- if (sym->st_shndx == SHN_COMMON)
+ if (sym->st_shndx == SHN_COMMON) {
+ pr_err("symbol '%s' in common section\n", name);
return -ENOEXEC;
+ }
if (sym->st_shndx >= pi->ehdr->e_shnum &&
- sym->st_shndx != SHN_ABS)
+ sym->st_shndx != SHN_ABS) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
return -ENOEXEC;
+ }
loc = pi->purgatory_buf;
loc += section->sh_offset;
@@ -326,7 +356,15 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
addr = section->sh_addr + relas[i].r_offset;
r_type = ELF64_R_TYPE(relas[i].r_info);
- arch_kexec_do_relocs(r_type, loc, val, addr);
+
+ if (r_type == R_390_PLT32DBL)
+ r_type = R_390_PC32DBL;
+
+ ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+ if (ret) {
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
}
return 0;
}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 39bcc0e39a10..4786bfe02144 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -2,8 +2,6 @@
/*
* Copyright IBM Corp. 2008, 2009
*
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
*/
#include <linux/linkage.h>
@@ -13,6 +11,18 @@
#include <asm/ptrace.h>
#include <asm/export.h>
+
+#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2)
+#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS)
+/* packed stack: allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE 24
+
+#ifdef CONFIG_FUNCTION_TRACER
+
GEN_BR_THUNK %r1
GEN_BR_THUNK %r14
@@ -22,25 +32,14 @@ ENTRY(ftrace_stub)
BR_EX %r14
ENDPROC(ftrace_stub)
-#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
-#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2)
-#ifdef __PACK_STACK
-/* allocate just enough for r14, r15 and backchain */
-#define TRACED_FUNC_FRAME_SIZE 24
-#else
-#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
-#endif
-
.macro ftrace_regs_entry, allregs=0
stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
.if \allregs == 1
- lghi %r14,0 # save condition code
- ipm %r14 # don't put any instructions
- sllg %r14,%r14,16 # clobbering CC before this point
+ # save psw mask
+ # don't put any instructions clobbering CC before this point
+ epsw %r1,%r14
+ risbg %r14,%r1,0,31,32
.endif
lgr %r1,%r15
@@ -56,7 +55,9 @@ ENDPROC(ftrace_stub)
.if \allregs == 1
stg %r14,(STACK_PTREGS_PSW)(%r15)
- stosm (STACK_PTREGS_PSW)(%r15),0
+ mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+ .else
+ xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
.endif
lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
@@ -132,3 +133,35 @@ SYM_FUNC_START(return_to_handler)
SYM_FUNC_END(return_to_handler)
#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_KPROBES
+
+SYM_FUNC_START(__kretprobe_trampoline)
+
+ stg %r14,(__SF_GPRS+8*8)(%r15)
+ lay %r15,-STACK_FRAME_SIZE(%r15)
+ stmg %r0,%r14,STACK_PTREGS_GPRS(%r15)
+
+ # store original stack pointer in backchain and pt_regs
+ lay %r7,STACK_FRAME_SIZE(%r15)
+ stg %r7,__SF_BACKCHAIN(%r15)
+ stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15)
+
+ # store full psw
+ epsw %r2,%r3
+ risbg %r3,%r2,0,31,32
+ stg %r3,STACK_PTREGS_PSW(%r15)
+ larl %r1,__kretprobe_trampoline
+ stg %r1,STACK_PTREGS_PSW+8(%r15)
+
+ lay %r2,STACK_PTREGS(%r15)
+ brasl %r14,trampoline_probe_handler
+
+ mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
+ lmg %r0,%r15,STACK_PTREGS_GPRS(%r15)
+ lpswe __SF_EMPTY(%r15)
+
+SYM_FUNC_END(__kretprobe_trampoline)
+
+#endif /* CONFIG_KPROBES */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index b01ba460b7ca..2d159b32885b 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -33,18 +33,19 @@
#define DEBUGP(fmt , ...)
#endif
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
void *module_alloc(unsigned long size)
{
+ gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (PAGE_ALIGN(size) > MODULES_LEN)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
- if (p && (kasan_module_alloc(p, size) < 0)) {
+ if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
}
@@ -340,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int insn[5];
- unsigned int *ip = me->core_layout.base +
- me->arch.plt_offset +
- info->plt_offset;
-
- insn[0] = 0x0d10e310; /* basr 1,0 */
- insn[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->core_layout.base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
- unsigned int *ij;
- ij = me->core_layout.base +
- me->arch.plt_offset +
- me->arch.plt_size - PLT_ENTRY_SIZE;
- insn[2] = 0xa7f40000 + /* j __jump_r1 */
- (unsigned int)(u16)
- (((unsigned long) ij - 8 -
- (unsigned long) ip) / 2);
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
} else {
- insn[2] = 0x07f10000; /* br %r1 */
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
}
- insn[3] = (unsigned int) (val >> 32);
- insn[4] = (unsigned int) val;
+ *(long *)&insn[14] = val;
write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
@@ -517,15 +517,9 @@ int module_finalize(const Elf_Ehdr *hdr,
ij = me->core_layout.base + me->arch.plt_offset +
me->arch.plt_size - PLT_ENTRY_SIZE;
- if (test_facility(35)) {
- ij[0] = 0xc6000000; /* exrl %r0,.+10 */
- ij[1] = 0x0005a7f4; /* j . */
- ij[2] = 0x000007f1; /* br %r1 */
- } else {
- ij[0] = 0x44000000 | (unsigned int)
- offsetof(struct lowcore, br_r1_trampoline);
- ij[1] = 0xa7f40000; /* j . */
- }
+ ij[0] = 0xc6000000; /* exrl %r0,.+10 */
+ ij[1] = 0x0005a7f4; /* j . */
+ ij[2] = 0x000007f1; /* br %r1 */
}
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
@@ -554,6 +548,5 @@ int module_finalize(const Elf_Ehdr *hdr,
#endif /* CONFIG_FUNCTION_TRACER */
}
- jump_label_apply_nops(me);
return 0;
}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 20f8e1868853..31cb9b00a36b 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -6,12 +6,12 @@
* Author(s): Ingo Adlung <adlung@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
* Cornelia Huck <cornelia.huck@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#include <linux/kernel_stat.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/entry-common.h>
#include <linux/hardirq.h>
#include <linux/log2.h>
#include <linux/kprobes.h>
@@ -30,6 +30,8 @@
#include <asm/switch_to.h>
#include <asm/ctl_reg.h>
#include <asm/asm-offsets.h>
+#include <asm/pai.h>
+
#include <linux/kvm_host.h>
struct mcck_struct {
@@ -58,27 +60,27 @@ static inline unsigned long nmi_get_mcesa_size(void)
/*
* The initial machine check extended save area for the boot CPU.
- * It will be replaced by nmi_init() with an allocated structure.
- * The structure is required for machine check happening early in
- * the boot process.
+ * It will be replaced on the boot CPU reinit with an allocated
+ * structure. The structure is required for machine check happening
+ * early in the boot process.
*/
-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
-void __init nmi_alloc_boot_cpu(struct lowcore *lc)
+void __init nmi_alloc_mcesa_early(u64 *mcesad)
{
if (!nmi_needs_mcesa())
return;
- lc->mcesad = (unsigned long) &boot_mcesa;
+ *mcesad = __pa(&boot_mcesa);
if (MACHINE_HAS_GS)
- lc->mcesad |= ilog2(MCESA_MAX_SIZE);
+ *mcesad |= ilog2(MCESA_MAX_SIZE);
}
-static int __init nmi_init(void)
+static void __init nmi_alloc_cache(void)
{
- unsigned long origin, cr0, size;
+ unsigned long size;
if (!nmi_needs_mcesa())
- return 0;
+ return;
size = nmi_get_mcesa_size();
if (size > MCESA_MIN_SIZE)
mcesa_origin_lc = ilog2(size);
@@ -86,40 +88,31 @@ static int __init nmi_init(void)
mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
if (!mcesa_cache)
panic("Couldn't create nmi save area cache");
- origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
- if (!origin)
- panic("Couldn't allocate nmi save area");
- /* The pointer is stored with mcesa_bits ORed in */
- kmemleak_not_leak((void *) origin);
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- /* Replace boot_mcesa on the boot CPU */
- S390_lowcore.mcesad = origin | mcesa_origin_lc;
- __ctl_load(cr0, 0, 0);
- return 0;
}
-early_initcall(nmi_init);
-int nmi_alloc_per_cpu(struct lowcore *lc)
+int __ref nmi_alloc_mcesa(u64 *mcesad)
{
unsigned long origin;
+ *mcesad = 0;
if (!nmi_needs_mcesa())
return 0;
+ if (!mcesa_cache)
+ nmi_alloc_cache();
origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
if (!origin)
return -ENOMEM;
/* The pointer is stored with mcesa_bits ORed in */
kmemleak_not_leak((void *) origin);
- lc->mcesad = origin | mcesa_origin_lc;
+ *mcesad = __pa(origin) | mcesa_origin_lc;
return 0;
}
-void nmi_free_per_cpu(struct lowcore *lc)
+void nmi_free_mcesa(u64 *mcesad)
{
if (!nmi_needs_mcesa())
return;
- kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
+ kmem_cache_free(mcesa_cache, __va(*mcesad & MCESA_ORIGIN_MASK));
}
static notrace void s390_handle_damage(void)
@@ -175,14 +168,16 @@ void __s390_handle_mcck(void)
"malfunction (code 0x%016lx).\n", mcck.mcck_code);
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
current->comm, current->pid);
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
}
-void noinstr s390_handle_mcck(void)
+void noinstr s390_handle_mcck(struct pt_regs *regs)
{
trace_hardirqs_off();
+ pai_kernel_enter(regs);
__s390_handle_mcck();
+ pai_kernel_exit(regs);
trace_hardirqs_on();
}
/*
@@ -246,7 +241,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
: "Q" (S390_lowcore.fpt_creg_save_area));
}
- mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (!MACHINE_HAS_VX) {
/* Validate floating point registers */
asm volatile(
@@ -273,7 +268,14 @@ static int notrace s390_validate_registers(union mci mci, int umode)
/* Validate vector registers */
union ctlreg0 cr0;
- if (!mci.vr) {
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
/*
* Vector registers can't be restored. If the kernel
* currently uses vector registers the system is
@@ -316,11 +318,21 @@ static int notrace s390_validate_registers(union mci mci, int umode)
if (cr2.gse) {
if (!mci.gs) {
/*
- * Guarded storage register can't be restored and
- * the current processes uses guarded storage.
- * It has to be terminated.
+ * 2 cases:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of
+ * guarded storage control can not be recreated, the
+ * process must be terminated.
+ * For SIE the guest values of guarded storage can not
+ * be recreated. This is either due to a bug or due to
+ * GS being disabled in the guest. The guest will be
+ * notified by KVM code and the guests machine check
+ * handling must take care of this. The host values
+ * are saved by KVM and are not affected.
*/
- kill_task = 1;
+ if (!test_cpu_flag(CIF_MCCK_GUEST))
+ kill_task = 1;
} else {
load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
}
@@ -386,11 +398,12 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
static unsigned long long last_ipd;
struct mcck_struct *mcck;
unsigned long long tmp;
+ irqentry_state_t irq_state;
union mci mci;
unsigned long mcck_dam_code;
int mcck_pending = 0;
- nmi_enter();
+ irq_state = irqentry_nmi_enter(regs);
if (user_mode(regs))
update_timer_mcck();
@@ -493,14 +506,14 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
clear_cpu_flag(CIF_MCCK_GUEST);
if (user_mode(regs) && mcck_pending) {
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 1;
}
if (mcck_pending)
schedule_mcck_handler();
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 60e6fec27bba..717bbcc056e5 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -105,6 +105,7 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
s32 *epo;
/* Second part of the instruction replace is always a nop */
+ memcpy(insnbuf + 2, branch, sizeof(branch));
for (epo = start; epo < end; epo++) {
instr = (u8 *) epo + *epo;
if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -117,42 +118,20 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
if (thunk[0] == 0xc6 && thunk[1] == 0x00)
/* exrl %r0,<target-br> */
br = thunk + (*(int *)(thunk + 2)) * 2;
- else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 &&
- thunk[6] == 0x44 && thunk[7] == 0x00 &&
- (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 &&
- (thunk[1] & 0xf0) == (thunk[8] & 0xf0))
- /* larl %rx,<target br> + ex %r0,0(%rx) */
- br = thunk + (*(int *)(thunk + 2)) * 2;
else
continue;
- /* Check for unconditional branch 0x07f? or 0x47f???? */
- if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
+ if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
continue;
-
- memcpy(insnbuf + 2, branch, sizeof(branch));
switch (type) {
case BRCL_EXPOLINE:
+ /* brcl to thunk, replace with br + nop */
insnbuf[0] = br[0];
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
- if (br[0] == 0x47) {
- /* brcl to b, replace with bc + nopr */
- insnbuf[2] = br[2];
- insnbuf[3] = br[3];
- } else {
- /* brcl to br, replace with bcr + nop */
- }
break;
case BRASL_EXPOLINE:
+ /* brasl to thunk, replace with basr + nop */
+ insnbuf[0] = 0x0d;
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
- if (br[0] == 0x47) {
- /* brasl to b, replace with bas + nopr */
- insnbuf[0] = 0x4d;
- insnbuf[2] = br[2];
- insnbuf[3] = br[3];
- } else {
- /* brasl to br, replace with basr + nop */
- insnbuf[0] = 0x0d;
- }
break;
}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 51c5a9f6e525..23ab9f02f278 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -33,10 +33,3 @@ void __init numa_setup(void)
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
-
-static int __init numa_init_late(void)
-{
- register_one_node(0);
- return 0;
-}
-arch_initcall(numa_init_late);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 4bef35b79b93..ec0bd9457e90 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,8 +13,10 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/maccess.h>
+#include <asm/asm-offsets.h>
/*
* OS info structure has to be page aligned
@@ -45,7 +47,7 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
*/
void os_info_entry_add(int nr, void *ptr, u64 size)
{
- os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
os_info.csum = os_info_csum(&os_info);
@@ -56,13 +58,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc, flags);
}
#ifdef CONFIG_CRASH_DUMP
@@ -90,7 +95,7 @@ static void os_info_old_alloc(int nr, int align)
goto fail;
}
buf_align = PTR_ALIGN(buf, align);
- if (copy_oldmem_kernel(buf_align, (void *) addr, size)) {
+ if (copy_oldmem_kernel(buf_align, addr, size)) {
msg = "copy failed";
goto fail_free;
}
@@ -123,15 +128,14 @@ static void os_info_old_init(void)
return;
if (!oldmem_data.start)
goto fail;
- if (copy_oldmem_kernel(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
goto fail;
if (addr == 0 || addr % PAGE_SIZE)
goto fail;
os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
if (!os_info_old)
goto fail;
- if (copy_oldmem_kernel(os_info_old, (void *) addr,
- sizeof(*os_info_old)))
+ if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old)))
goto fail_free;
if (os_info_old->magic != OS_INFO_MAGIC)
goto fail_free;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index ee8707abdb6a..f043a7ff220b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -516,6 +516,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
return err;
}
+/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+ u64 ev = event->attr.config;
+
+ if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+ return PERF_TYPE_HARDWARE;
+ return PERF_TYPE_RAW;
+}
+
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
@@ -525,7 +545,7 @@ static int cpumf_pmu_event_init(struct perf_event *event)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
- err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
else
return -ENOENT;
@@ -644,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size;
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
}
overflow = perf_event_overflow(event, &data, &regs);
@@ -1451,6 +1472,8 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
/* Get the CPU speed, try sampling facility first and CPU attributes second. */
static void cfdiag_get_cpu_speed(void)
{
+ unsigned long mhz;
+
if (cpum_sf_avail()) { /* Sampling facility first */
struct hws_qsi_info_block si;
@@ -1464,12 +1487,9 @@ static void cfdiag_get_cpu_speed(void)
/* Fallback: CPU speed extract static part. Used in case
* CPU Measurement Sampling Facility is turned off.
*/
- if (test_facility(34)) {
- unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
-
- if (mhz != -1UL)
- cfdiag_cpu_speed = mhz & 0xffffffff;
- }
+ mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+ if (mhz != -1UL)
+ cfdiag_cpu_speed = mhz & 0xffffffff;
}
static int cfset_init(void)
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
index 30f0242de4a5..8ee48672233f 100644
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ b/arch/s390/kernel/perf_cpum_cf_common.c
@@ -178,7 +178,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
case CPUMF_CTR_SET_CRYPTO:
if (info->csvn >= 1 && info->csvn <= 5)
ctrset_size = 16;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
@@ -188,7 +188,7 @@ size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
ctrset_size = 48;
else if (info->csvn >= 3 && info->csvn <= 5)
ctrset_size = 128;
- else if (info->csvn == 6)
+ else if (info->csvn == 6 || info->csvn == 7)
ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 37265f551a11..0d64aafd158f 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -295,6 +295,76 @@ CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -344,7 +414,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -635,6 +705,80 @@ static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z16, SORTL),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -715,8 +859,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 1 ... 5:
csvn = cpumcf_svn_12345_pmu_event_attr;
break;
- case 6:
- csvn = cpumcf_svn_6_pmu_event_attr;
+ case 6 ... 7:
+ csvn = cpumcf_svn_67_pmu_event_attr;
break;
default:
csvn = none;
@@ -749,6 +893,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x8562:
model = cpumcf_z15_pmu_event_attr;
break;
+ case 0x3931:
+ case 0x3932:
+ model = cpumcf_z16_pmu_event_attr;
+ break;
default:
model = none;
break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index db62def4ef28..332a49965130 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1179,7 +1179,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
sample = (struct hws_basic_entry *) *sdbt;
while ((unsigned long *) sample < (unsigned long *) te) {
/* Check for an empty sample */
- if (!sample->def)
+ if (!sample->def || sample->LS)
break;
/* Update perf event period */
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index ea7729bebaa0..c27321cb0969 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -30,7 +30,7 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
if (!stack)
return NULL;
- return (struct kvm_s390_sie_block *) stack->empty1[0];
+ return (struct kvm_s390_sie_block *)stack->sie_control_block;
}
static bool is_in_guest(struct pt_regs *regs)
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
new file mode 100644
index 000000000000..6826e2a69a21
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_crypto"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *cfm_dbg;
+static unsigned int paicrypt_cnt; /* Size of the mapped counter sets */
+ /* extracted with QPACI instruction */
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+struct paicrypt_map {
+ unsigned long *page; /* Page for CPU to store counters */
+ struct pai_userdata *save; /* Page to store no-zero counters */
+ unsigned int users; /* # of PAI crypto users */
+ unsigned int sampler; /* # of PAI crypto samplers */
+ unsigned int counter; /* # of PAI crypto counters */
+ struct perf_event *event; /* Perf event for sampling */
+};
+
+static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+
+ cpump->event = NULL;
+ static_branch_dec(&pai_key);
+ mutex_lock(&pai_reserve_mutex);
+ if (event->attr.sample_period)
+ cpump->sampler -= 1;
+ else
+ cpump->counter -= 1;
+ debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d"
+ " sampler %d counter %d\n", __func__,
+ event->attr.config, event->cpu, cpump->sampler,
+ cpump->counter);
+ if (!cpump->counter && !cpump->sampler) {
+ debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
+ __func__, (unsigned long)cpump->page,
+ cpump->save);
+ free_page((unsigned long)cpump->page);
+ cpump->page = NULL;
+ kvfree(cpump->save);
+ cpump->save = NULL;
+ }
+ mutex_unlock(&pai_reserve_mutex);
+}
+
+static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+{
+ if (kernel)
+ nr += PAI_CRYPTO_MAXCTR;
+ return cpump->page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For event
+ * CRYPTO_ALL sum up all events.
+ */
+static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_CRYPTO_BASE) {
+ return paicrypt_getctr(cpump,
+ event->attr.config - PAI_CRYPTO_BASE,
+ kernel);
+ }
+
+ for (i = 1; i <= paicrypt_cnt; i++) {
+ u64 val = paicrypt_getctr(cpump, i, kernel);
+
+ if (!val)
+ continue;
+ sum += val;
+ }
+ return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+ u64 sum = 0;
+
+ if (!event->attr.exclude_kernel)
+ sum += paicrypt_getdata(event, true);
+ if (!event->attr.exclude_user)
+ sum += paicrypt_getdata(event, false);
+
+ return sum;
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for crypto events
+ *
+ * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+{
+ unsigned int *use_ptr;
+ int rc = 0;
+
+ mutex_lock(&pai_reserve_mutex);
+ if (a->sample_period) { /* Sampling requested */
+ use_ptr = &cpump->sampler;
+ if (cpump->counter || cpump->sampler)
+ rc = -EBUSY; /* ... sampling/counting active */
+ } else { /* Counting requested */
+ use_ptr = &cpump->counter;
+ if (cpump->sampler)
+ rc = -EBUSY; /* ... and sampling active */
+ }
+ if (rc)
+ goto unlock;
+
+ /* Allocate memory for counter page and counter extraction.
+ * Only the first counting event has to allocate a page.
+ */
+ if (cpump->page)
+ goto unlock;
+
+ rc = -ENOMEM;
+ cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (!cpump->page)
+ goto unlock;
+ cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+ sizeof(struct pai_userdata), GFP_KERNEL);
+ if (!cpump->save) {
+ free_page((unsigned long)cpump->page);
+ cpump->page = NULL;
+ goto unlock;
+ }
+ rc = 0;
+
+unlock:
+ /* If rc is non-zero, do not increment counter/sampler. */
+ if (!rc)
+ *use_ptr += 1;
+ debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d"
+ " counter %d page %#lx save %p rc %d\n", __func__,
+ a->sample_period, cpump->sampler, cpump->counter,
+ (unsigned long)cpump->page, cpump->save, rc);
+ mutex_unlock(&pai_reserve_mutex);
+ return rc;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paicrypt_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ struct paicrypt_map *cpump;
+ int rc;
+
+ /* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI crypto event must be in valid range */
+ if (a->config < PAI_CRYPTO_BASE ||
+ a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+ return -EINVAL;
+ /* Allow only CPU wide operation, no process context for now. */
+ if (event->hw.target || event->cpu == -1)
+ return -ENOENT;
+ /* Allow only CRYPTO_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_CRYPTO_BASE)
+ return -EINVAL;
+
+ cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+ rc = paicrypt_busy(a, cpump);
+ if (rc)
+ return rc;
+
+ /* Event initialization sets last_tag to 0. When later on the events
+ * are deleted and re-added, do not reset the event count value to zero.
+ * Events are added, deleted and re-added when 2 or more events
+ * are active at the same time.
+ */
+ event->hw.last_tag = 0;
+ cpump->event = event;
+ event->destroy = paicrypt_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which contain the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ static_branch_inc(&pai_key);
+ return 0;
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paicrypt_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+ u64 sum;
+
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->count, 0);
+ local64_set(&event->hw.prev_count, sum);
+ }
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ unsigned long ccd;
+
+ if (cpump->users++ == 0) {
+ ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
+ WRITE_ONCE(S390_lowcore.ccd, ccd);
+ __ctl_set_bit(0, 50);
+ }
+ cpump->event = event;
+ if (flags & PERF_EF_START && !event->attr.sample_period) {
+ /* Only counting needs initial counter value */
+ paicrypt_start(event, PERF_EF_RELOAD);
+ }
+ event->hw.state = 0;
+ if (event->attr.sample_period)
+ perf_sched_cb_inc(event->pmu);
+ return 0;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+ paicrypt_read(event);
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+
+ if (event->attr.sample_period)
+ perf_sched_cb_dec(event->pmu);
+ if (!event->attr.sample_period)
+ /* Only counting needs to read counter */
+ paicrypt_stop(event, PERF_EF_UPDATE);
+ if (cpump->users-- == 1) {
+ __ctl_clear_bit(0, 50);
+ WRITE_ONCE(S390_lowcore.ccd, 0);
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paicrypt_copy(struct pai_userdata *userdata,
+ struct paicrypt_map *cpump,
+ bool exclude_user, bool exclude_kernel)
+{
+ int i, outidx = 0;
+
+ for (i = 1; i <= paicrypt_cnt; i++) {
+ u64 val = 0;
+
+ if (!exclude_kernel)
+ val += paicrypt_getctr(cpump, i, true);
+ if (!exclude_user)
+ val += paicrypt_getctr(cpump, i, false);
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(struct pai_userdata);
+}
+
+static int paicrypt_push_sample(void)
+{
+ struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct perf_event *event = cpump->event;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ size_t rawsize;
+ int overflow;
+
+ if (!cpump->event) /* No event active */
+ return 0;
+ rawsize = paicrypt_copy(cpump->save, cpump,
+ cpump->event->attr.exclude_user,
+ cpump->event->attr.exclude_kernel);
+ if (!rawsize) /* No incremented counters */
+ return 0;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+ data.cpu_entry.cpu = smp_processor_id();
+ data.cpu_entry.reserved = 0;
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Clear lowcore page after read */
+ memset(cpump->page, 0, PAGE_SIZE);
+ return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, clear values.
+ */
+ if (!sched_in)
+ paicrypt_push_sample();
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+ .name = "events",
+ .attrs = NULL /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+ .name = "format",
+ .attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+ &paicrypt_events_group,
+ &paicrypt_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = paicrypt_event_init,
+ .add = paicrypt_add,
+ .del = paicrypt_del,
+ .start = paicrypt_start,
+ .stop = paicrypt_stop,
+ .read = paicrypt_read,
+ .sched_task = paicrypt_sched_task,
+ .attr_groups = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+ [0] = "CRYPTO_ALL",
+ [1] = "KM_DEA",
+ [2] = "KM_TDEA_128",
+ [3] = "KM_TDEA_192",
+ [4] = "KM_ENCRYPTED_DEA",
+ [5] = "KM_ENCRYPTED_TDEA_128",
+ [6] = "KM_ENCRYPTED_TDEA_192",
+ [7] = "KM_AES_128",
+ [8] = "KM_AES_192",
+ [9] = "KM_AES_256",
+ [10] = "KM_ENCRYPTED_AES_128",
+ [11] = "KM_ENCRYPTED_AES_192",
+ [12] = "KM_ENCRYPTED_AES_256",
+ [13] = "KM_XTS_AES_128",
+ [14] = "KM_XTS_AES_256",
+ [15] = "KM_XTS_ENCRYPTED_AES_128",
+ [16] = "KM_XTS_ENCRYPTED_AES_256",
+ [17] = "KMC_DEA",
+ [18] = "KMC_TDEA_128",
+ [19] = "KMC_TDEA_192",
+ [20] = "KMC_ENCRYPTED_DEA",
+ [21] = "KMC_ENCRYPTED_TDEA_128",
+ [22] = "KMC_ENCRYPTED_TDEA_192",
+ [23] = "KMC_AES_128",
+ [24] = "KMC_AES_192",
+ [25] = "KMC_AES_256",
+ [26] = "KMC_ENCRYPTED_AES_128",
+ [27] = "KMC_ENCRYPTED_AES_192",
+ [28] = "KMC_ENCRYPTED_AES_256",
+ [29] = "KMC_PRNG",
+ [30] = "KMA_GCM_AES_128",
+ [31] = "KMA_GCM_AES_192",
+ [32] = "KMA_GCM_AES_256",
+ [33] = "KMA_GCM_ENCRYPTED_AES_128",
+ [34] = "KMA_GCM_ENCRYPTED_AES_192",
+ [35] = "KMA_GCM_ENCRYPTED_AES_256",
+ [36] = "KMF_DEA",
+ [37] = "KMF_TDEA_128",
+ [38] = "KMF_TDEA_192",
+ [39] = "KMF_ENCRYPTED_DEA",
+ [40] = "KMF_ENCRYPTED_TDEA_128",
+ [41] = "KMF_ENCRYPTED_TDEA_192",
+ [42] = "KMF_AES_128",
+ [43] = "KMF_AES_192",
+ [44] = "KMF_AES_256",
+ [45] = "KMF_ENCRYPTED_AES_128",
+ [46] = "KMF_ENCRYPTED_AES_192",
+ [47] = "KMF_ENCRYPTED_AES_256",
+ [48] = "KMCTR_DEA",
+ [49] = "KMCTR_TDEA_128",
+ [50] = "KMCTR_TDEA_192",
+ [51] = "KMCTR_ENCRYPTED_DEA",
+ [52] = "KMCTR_ENCRYPTED_TDEA_128",
+ [53] = "KMCTR_ENCRYPTED_TDEA_192",
+ [54] = "KMCTR_AES_128",
+ [55] = "KMCTR_AES_192",
+ [56] = "KMCTR_AES_256",
+ [57] = "KMCTR_ENCRYPTED_AES_128",
+ [58] = "KMCTR_ENCRYPTED_AES_192",
+ [59] = "KMCTR_ENCRYPTED_AES_256",
+ [60] = "KMO_DEA",
+ [61] = "KMO_TDEA_128",
+ [62] = "KMO_TDEA_192",
+ [63] = "KMO_ENCRYPTED_DEA",
+ [64] = "KMO_ENCRYPTED_TDEA_128",
+ [65] = "KMO_ENCRYPTED_TDEA_192",
+ [66] = "KMO_AES_128",
+ [67] = "KMO_AES_192",
+ [68] = "KMO_AES_256",
+ [69] = "KMO_ENCRYPTED_AES_128",
+ [70] = "KMO_ENCRYPTED_AES_192",
+ [71] = "KMO_ENCRYPTED_AES_256",
+ [72] = "KIMD_SHA_1",
+ [73] = "KIMD_SHA_256",
+ [74] = "KIMD_SHA_512",
+ [75] = "KIMD_SHA3_224",
+ [76] = "KIMD_SHA3_256",
+ [77] = "KIMD_SHA3_384",
+ [78] = "KIMD_SHA3_512",
+ [79] = "KIMD_SHAKE_128",
+ [80] = "KIMD_SHAKE_256",
+ [81] = "KIMD_GHASH",
+ [82] = "KLMD_SHA_1",
+ [83] = "KLMD_SHA_256",
+ [84] = "KLMD_SHA_512",
+ [85] = "KLMD_SHA3_224",
+ [86] = "KLMD_SHA3_256",
+ [87] = "KLMD_SHA3_384",
+ [88] = "KLMD_SHA3_512",
+ [89] = "KLMD_SHAKE_128",
+ [90] = "KLMD_SHAKE_256",
+ [91] = "KMAC_DEA",
+ [92] = "KMAC_TDEA_128",
+ [93] = "KMAC_TDEA_192",
+ [94] = "KMAC_ENCRYPTED_DEA",
+ [95] = "KMAC_ENCRYPTED_TDEA_128",
+ [96] = "KMAC_ENCRYPTED_TDEA_192",
+ [97] = "KMAC_AES_128",
+ [98] = "KMAC_AES_192",
+ [99] = "KMAC_AES_256",
+ [100] = "KMAC_ENCRYPTED_AES_128",
+ [101] = "KMAC_ENCRYPTED_AES_192",
+ [102] = "KMAC_ENCRYPTED_AES_256",
+ [103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+ [104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+ [105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+ [106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+ [107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+ [108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+ [109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+ [110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+ [111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+ [112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+ [113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+ [114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+ [115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+ [116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+ [117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+ [118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+ [119] = "PCC_SCALAR_MULTIPLY_P256",
+ [120] = "PCC_SCALAR_MULTIPLY_P384",
+ [121] = "PCC_SCALAR_MULTIPLY_P521",
+ [122] = "PCC_SCALAR_MULTIPLY_ED25519",
+ [123] = "PCC_SCALAR_MULTIPLY_ED448",
+ [124] = "PCC_SCALAR_MULTIPLY_X25519",
+ [125] = "PCC_SCALAR_MULTIPLY_X448",
+ [126] = "PRNO_SHA_512_DRNG",
+ [127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+ [128] = "PRNO_TRNG",
+ [129] = "KDSA_ECDSA_VERIFY_P256",
+ [130] = "KDSA_ECDSA_VERIFY_P384",
+ [131] = "KDSA_ECDSA_VERIFY_P521",
+ [132] = "KDSA_ECDSA_SIGN_P256",
+ [133] = "KDSA_ECDSA_SIGN_P384",
+ [134] = "KDSA_ECDSA_SIGN_P521",
+ [135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+ [136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+ [137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+ [138] = "KDSA_EDDSA_VERIFY_ED25519",
+ [139] = "KDSA_EDDSA_VERIFY_ED448",
+ [140] = "KDSA_EDDSA_SIGN_ED25519",
+ [141] = "KDSA_EDDSA_SIGN_ED448",
+ [142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+ [143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+ [144] = "PCKMO_ENCRYPT_DEA_KEY",
+ [145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+ [146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+ [147] = "PCKMO_ENCRYPT_AES_128_KEY",
+ [148] = "PCKMO_ENCRYPT_AES_192_KEY",
+ [149] = "PCKMO_ENCRYPT_AES_256_KEY",
+ [150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+ [151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+ [152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+ [153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+ [154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+ [155] = "IBM_RESERVED_155",
+ [156] = "IBM_RESERVED_156",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ struct device_attribute *dap;
+
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_CRYPTO_BASE + num;
+ pa->attr.attr.name = paicrypt_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
+ GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i - 1);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paicrypt_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paicrypt_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc;
+
+ if (!test_facility(196))
+ return 0;
+
+ qpaci(&ib);
+ paicrypt_cnt = ib.num_cc;
+ if (paicrypt_cnt == 0)
+ return 0;
+ if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
+ paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+
+ rc = attr_event_init(); /* Export known PAI crypto events */
+ if (rc) {
+ pr_err("Creation of PMU pai_crypto /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!cfm_dbg) {
+ pr_err("Registration of s390dbf pai_crypto failed\n");
+ return -ENOMEM;
+ }
+ debug_register_view(cfm_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
+ if (rc) {
+ pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
+ rc);
+ debug_unregister_view(cfm_dbg, &debug_sprintf_view);
+ debug_unregister(cfm_dbg);
+ return rc;
+ }
+ return 0;
+}
+
+device_initcall(paicrypt_init);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..74b53c531e0c
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_ext"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+
+#include <asm/cpu_mcf.h>
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
+#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
+
+enum paiext_mode {
+ PAI_MODE_NONE,
+ PAI_MODE_SAMPLING,
+ PAI_MODE_COUNTER,
+};
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Area to store non-zero counters */
+ enum paiext_mode mode; /* Type of event */
+ unsigned int active_events; /* # of PAI Extension users */
+ unsigned int refcnt;
+ struct perf_event *event; /* Perf event for sampling */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+ struct paiext_map *mapptr;
+};
+
+static struct paiext_root { /* Anchor to per CPU data */
+ int refcnt; /* Overall active events */
+ struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+ if (!--paiext_root.refcnt) {
+ free_percpu(paiext_root.mapptr);
+ paiext_root.mapptr = NULL;
+ }
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+ if (++paiext_root.refcnt == 1) {
+ /* The memory is already zeroed. */
+ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+ if (!paiext_root.mapptr) {
+ /* Returing without refcnt adjustment is ok. The
+ * error code is handled by paiext_alloc() which
+ * decrements refcnt when an event can not be
+ * created.
+ */
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ struct paiext_map *cpump = mp->mapptr;
+
+ mutex_lock(&paiext_reserve_mutex);
+ cpump->event = NULL;
+ if (!--cpump->refcnt) /* Last reference gone */
+ paiext_free(mp);
+ paiext_root_free();
+ mutex_unlock(&paiext_reserve_mutex);
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+ event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+ struct paiext_mapptr *mp;
+ struct paiext_map *cpump;
+ int rc;
+
+ mutex_lock(&paiext_reserve_mutex);
+
+ rc = paiext_root_alloc();
+ if (rc)
+ goto unlock;
+
+ mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paiext_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto unlock;
+
+ /* Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary alignment.
+ * - a 1KB byte block and requires 1KB boundary alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * need adjustment.
+ */
+ mp->mapptr = cpump;
+ cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ cpump->save = kvmalloc_array(paiext_cnt + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+ paiext_free(mp);
+ goto unlock;
+ }
+ cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+ : PAI_MODE_COUNTER;
+ } else {
+ /* Multiple invocation, check whats active.
+ * Supported are multiple counter events or only one sampling
+ * event concurrently at any one time.
+ */
+ if (cpump->mode == PAI_MODE_SAMPLING ||
+ (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
+ rc = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ rc = 0;
+ cpump->event = event;
+ ++cpump->refcnt;
+
+unlock:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ paiext_root_free();
+ }
+ mutex_unlock(&paiext_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+ /* Offset NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI extension event must be valid and in supported range */
+ rc = paiext_event_valid(event);
+ if (rc)
+ return rc;
+ /* Allow only CPU wide operation, no process context for now. */
+ if (event->hw.target || event->cpu == -1)
+ return -ENOENT;
+ /* Allow only event NNPA_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_NNPA_BASE)
+ return -EINVAL;
+ /* Prohibit exclude_user event selection */
+ if (a->exclude_user)
+ return -EINVAL;
+
+ rc = paiext_alloc(a, event);
+ if (rc)
+ return rc;
+ event->hw.last_tag = 0;
+ event->destroy = paiext_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which are the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+ return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_NNPA_BASE)
+ return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+ for (i = 1; i <= paiext_cnt; i++)
+ sum += paiext_getctr(cpump, i);
+
+ return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paiext_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = new - prev;
+ local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ u64 sum;
+
+ if (event->hw.last_tag)
+ return;
+ event->hw.last_tag = 1;
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (++cpump->active_events == 1) {
+ S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ __ctl_set_bit(0, 49);
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+ if (flags & PERF_EF_START && !event->attr.sample_period) {
+ /* Only counting needs initial counter value */
+ paiext_start(event, PERF_EF_RELOAD);
+ }
+ event->hw.state = 0;
+ if (event->attr.sample_period) {
+ cpump->event = event;
+ perf_sched_cb_inc(event->pmu);
+ }
+ return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ paiext_read(event);
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (event->attr.sample_period)
+ perf_sched_cb_dec(event->pmu);
+ if (!event->attr.sample_period) {
+ /* Only counting needs to read counter */
+ paiext_stop(event, PERF_EF_UPDATE);
+ }
+ if (--cpump->active_events == 0) {
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ __ctl_clear_bit(0, 49);
+ pcb->acc = 0;
+ S390_lowcore.aicd = 0;
+ debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+ __func__, S390_lowcore.aicd, pcb->acc);
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+ struct pai_userdata *userdata = cpump->save;
+ int i, outidx = 0;
+
+ for (i = 1; i <= paiext_cnt; i++) {
+ u64 val = paiext_getctr(cpump, i);
+
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ size_t rawsize;
+ int overflow;
+
+ rawsize = paiext_copy(cpump);
+ if (!rawsize) /* No incremented counters */
+ return 0;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = smp_processor_id();
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ raw.size = raw.frag.size;
+ data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Clear lowcore area after read */
+ memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+ return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, clear values.
+ */
+ if (!sched_in)
+ paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ struct device_attribute *dap;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_NNPA_BASE + num;
+ pa->attr.attr.name = paiext_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+ GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i - 1);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paiext_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paiext_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc = -ENOMEM;
+
+ if (!test_facility(197))
+ return 0;
+
+ qpaci(&ib);
+ paiext_cnt = ib.num_nnpa;
+ if (paiext_cnt >= PAI_NNPA_MAXCTR)
+ paiext_cnt = PAI_NNPA_MAXCTR;
+ if (!paiext_cnt)
+ return 0;
+
+ rc = attr_event_init();
+ if (rc) {
+ pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!paiext_dbg) {
+ pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+ rc = -ENOMEM;
+ goto out_init;
+ }
+ debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+ if (rc) {
+ pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+ "rc=%i\n", rc);
+ goto out_pmu;
+ }
+
+ return 0;
+
+out_pmu:
+ debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+ debug_unregister(paiext_dbg);
+out_init:
+ attr_event_free(paiext_events_group.attrs,
+ ARRAY_SIZE(paiext_ctrnames) + 1);
+ return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e8858b2de24b..42af4b3aa02b 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,12 +91,26 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
memcpy(dst, src, arch_task_struct_size);
dst->thread.fpu.regs = dst->thread.fpu.fprs;
+
+ /*
+ * Don't transfer over the runtime instrumentation or the guarded
+ * storage control block pointers. These fields are cleared here instead
+ * of in copy_thread() to avoid premature freeing of associated memory
+ * on fork() failure. Wait to clear the RI flag because ->stack still
+ * refers to the source thread.
+ */
+ dst->thread.ri_cb = NULL;
+ dst->thread.gs_cb = NULL;
+ dst->thread.gs_bc_cb = NULL;
+
return 0;
}
-int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
+ unsigned long clone_flags = args->flags;
+ unsigned long new_stackp = args->stack;
+ unsigned long tls = args->tls;
struct fake_frame
{
struct stack_frame sf;
@@ -130,16 +144,15 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->sf.gprs[9] = (unsigned long)frame;
/* Store access registers to kernel stack of new process. */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
- frame->childregs.gprs[9] = new_stackp; /* function */
- frame->childregs.gprs[10] = arg;
- frame->childregs.gprs[11] = (unsigned long)do_exit;
+ frame->childregs.gprs[9] = (unsigned long)args->fn;
+ frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
frame->childregs.orig_gpr2 = -1;
frame->childregs.last_break = 1;
return 0;
@@ -149,13 +162,11 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
-
- /* Don't copy runtime instrumentation info */
- p->thread.ri_cb = NULL;
+ /*
+ * Clear the runtime instrumentation flag after the above childregs
+ * copy. The CB pointer was already cleared in arch_dup_task_struct().
+ */
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Don't copy guarded storage control block */
- p->thread.gs_cb = NULL;
- p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
@@ -213,13 +224,13 @@ unsigned long __get_wchan(struct task_struct *p)
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= prandom_u32_max(PAGE_SIZE);
return sp & ~0xf;
}
static inline unsigned long brk_rnd(void)
{
- return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index d9d4a806979e..a194611ba88c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/stop_machine.h>
-#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/random.h>
@@ -96,15 +95,6 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, current);
}
-/*
- * cpu_have_feature - Test CPU features on module initialization
- */
-int cpu_have_feature(unsigned int num)
-{
- return elf_hwcap & (1UL << num);
-}
-EXPORT_SYMBOL(cpu_have_feature);
-
static void show_facilities(struct seq_file *m)
{
unsigned int bit;
@@ -172,8 +162,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
static int __init setup_hwcaps(void)
{
/* instructions named N3, "backported" to esa-mode */
- if (test_facility(0))
- elf_hwcap |= HWCAP_ESAN3;
+ elf_hwcap |= HWCAP_ESAN3;
/* z/Architecture mode active */
elf_hwcap |= HWCAP_ZARCH;
@@ -191,8 +180,7 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_LDISP;
/* extended-immediate */
- if (test_facility(21))
- elf_hwcap |= HWCAP_EIMM;
+ elf_hwcap |= HWCAP_EIMM;
/* extended-translation facility 3 enhancement */
if (test_facility(22) && test_facility(30))
@@ -262,21 +250,7 @@ static int __init setup_elf_platform(void)
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
- case 0x2064:
- case 0x2066:
- default: /* Use "z900" as default for 64 bit kernels. */
- strcpy(elf_platform, "z900");
- break;
- case 0x2084:
- case 0x2086:
- strcpy(elf_platform, "z990");
- break;
- case 0x2094:
- case 0x2096:
- strcpy(elf_platform, "z9-109");
- break;
- case 0x2097:
- case 0x2098:
+ default: /* Use "z10" as default. */
strcpy(elf_platform, "z10");
break;
case 0x2817:
@@ -299,6 +273,10 @@ static int __init setup_elf_platform(void)
case 0x8562:
strcpy(elf_platform, "z15");
break;
+ case 0x3931:
+ case 0x3932:
+ strcpy(elf_platform, "z16");
+ break;
}
return 0;
}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 0ea3d02b378d..53e0209229f8 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -21,7 +21,6 @@
#include <linux/signal.h>
#include <linux/elf.h>
#include <linux/regset.h>
-#include <linux/tracehook.h>
#include <linux/seccomp.h>
#include <linux/compat.h>
#include <trace/syscall.h>
@@ -147,38 +146,36 @@ void ptrace_disable(struct task_struct *task)
static inline unsigned long __peek_user_per(struct task_struct *child,
addr_t addr)
{
- struct per_struct_kernel *dummy = NULL;
-
- if (addr == (addr_t) &dummy->cr9)
+ if (addr == offsetof(struct per_struct_kernel, cr9))
/* Control bits of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
PER_EVENT_IFETCH : child->thread.per_user.control;
- else if (addr == (addr_t) &dummy->cr10)
+ else if (addr == offsetof(struct per_struct_kernel, cr10))
/* Start address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
0 : child->thread.per_user.start;
- else if (addr == (addr_t) &dummy->cr11)
+ else if (addr == offsetof(struct per_struct_kernel, cr11))
/* End address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
-1UL : child->thread.per_user.end;
- else if (addr == (addr_t) &dummy->bits)
+ else if (addr == offsetof(struct per_struct_kernel, bits))
/* Single-step bit. */
return test_thread_flag(TIF_SINGLE_STEP) ?
(1UL << (BITS_PER_LONG - 1)) : 0;
- else if (addr == (addr_t) &dummy->starting_addr)
+ else if (addr == offsetof(struct per_struct_kernel, starting_addr))
/* Start address of the user specified per set. */
return child->thread.per_user.start;
- else if (addr == (addr_t) &dummy->ending_addr)
+ else if (addr == offsetof(struct per_struct_kernel, ending_addr))
/* End address of the user specified per set. */
return child->thread.per_user.end;
- else if (addr == (addr_t) &dummy->perc_atmid)
+ else if (addr == offsetof(struct per_struct_kernel, perc_atmid))
/* PER code, ATMID and AI of the last PER trap */
return (unsigned long)
child->thread.per_event.cause << (BITS_PER_LONG - 16);
- else if (addr == (addr_t) &dummy->address)
+ else if (addr == offsetof(struct per_struct_kernel, address))
/* Address of the last PER trap */
return child->thread.per_event.address;
- else if (addr == (addr_t) &dummy->access_id)
+ else if (addr == offsetof(struct per_struct_kernel, access_id))
/* Access id of the last PER trap */
return (unsigned long)
child->thread.per_event.paid << (BITS_PER_LONG - 8);
@@ -196,61 +193,60 @@ static inline unsigned long __peek_user_per(struct task_struct *child,
*/
static unsigned long __peek_user(struct task_struct *child, addr_t addr)
{
- struct user *dummy = NULL;
addr_t offset, tmp;
- if (addr < (addr_t) &dummy->regs.acrs) {
+ if (addr < offsetof(struct user, regs.acrs)) {
/*
* psw and gprs are stored on the stack
*/
tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
- if (addr == (addr_t) &dummy->regs.psw.mask) {
+ if (addr == offsetof(struct user, regs.psw.mask)) {
/* Return a clean psw mask. */
tmp &= PSW_MASK_USER | PSW_MASK_RI;
tmp |= PSW_USER_BITS;
}
- } else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr < offsetof(struct user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy->regs.acrs;
+ offset = addr - offsetof(struct user, regs.acrs);
/*
* Very special case: old & broken 64 bit gdb reading
* from acrs[15]. Result is a 64 bit value. Read the
* 32 bit acrs[15] value and shift it by 32. Sick...
*/
- if (addr == (addr_t) &dummy->regs.acrs[15])
+ if (addr == offsetof(struct user, regs.acrs[15]))
tmp = ((unsigned long) child->thread.acrs[15]) << 32;
else
tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
- } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr == offsetof(struct user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
- } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ } else if (addr < offsetof(struct user, regs.fp_regs)) {
/*
* prevent reads of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
tmp = 0;
- } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
/*
* floating point control reg. is in the thread structure
*/
tmp = child->thread.fpu.fpc;
tmp <<= BITS_PER_LONG - 32;
- } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.fpu
* or the child->thread.fpu.vxrs array
*/
- offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ offset = addr - offsetof(struct user, regs.fp_regs.fprs);
if (MACHINE_HAS_VX)
tmp = *(addr_t *)
((addr_t) child->thread.fpu.vxrs + 2*offset);
@@ -258,11 +254,11 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
tmp = *(addr_t *)
((addr_t) child->thread.fpu.fprs + offset);
- } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy->regs.per_info;
+ addr -= offsetof(struct user, regs.per_info);
tmp = __peek_user_per(child, addr);
} else
@@ -281,8 +277,8 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
* an alignment of 4. Programmers from hell...
*/
mask = __ADDR_MASK;
- if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
- addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ if (addr >= offsetof(struct user, regs.acrs) &&
+ addr < offsetof(struct user, regs.orig_gpr2))
mask = 3;
if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
@@ -294,8 +290,6 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
static inline void __poke_user_per(struct task_struct *child,
addr_t addr, addr_t data)
{
- struct per_struct_kernel *dummy = NULL;
-
/*
* There are only three fields in the per_info struct that the
* debugger user can write to.
@@ -308,14 +302,14 @@ static inline void __poke_user_per(struct task_struct *child,
* addresses are used only if single stepping is not in effect.
* Writes to any other field in per_info are ignored.
*/
- if (addr == (addr_t) &dummy->cr9)
+ if (addr == offsetof(struct per_struct_kernel, cr9))
/* PER event mask of the user specified per set. */
child->thread.per_user.control =
data & (PER_EVENT_MASK | PER_CONTROL_MASK);
- else if (addr == (addr_t) &dummy->starting_addr)
+ else if (addr == offsetof(struct per_struct_kernel, starting_addr))
/* Starting address of the user specified per set. */
child->thread.per_user.start = data;
- else if (addr == (addr_t) &dummy->ending_addr)
+ else if (addr == offsetof(struct per_struct_kernel, ending_addr))
/* Ending address of the user specified per set. */
child->thread.per_user.end = data;
}
@@ -328,16 +322,15 @@ static inline void __poke_user_per(struct task_struct *child,
*/
static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
{
- struct user *dummy = NULL;
addr_t offset;
- if (addr < (addr_t) &dummy->regs.acrs) {
+ if (addr < offsetof(struct user, regs.acrs)) {
struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
- if (addr == (addr_t) &dummy->regs.psw.mask) {
+ if (addr == offsetof(struct user, regs.psw.mask)) {
unsigned long mask = PSW_MASK_USER;
mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
@@ -359,36 +352,36 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
regs->int_code = 0x20000 | (data & 0xffff);
}
*(addr_t *)((addr_t) &regs->psw + addr) = data;
- } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
+ } else if (addr < offsetof(struct user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy->regs.acrs;
+ offset = addr - offsetof(struct user, regs.acrs);
/*
* Very special case: old & broken 64 bit gdb writing
* to acrs[15] with a 64 bit value. Ignore the lower
* half of the value and write the upper 32 bit to
* acrs[15]. Sick...
*/
- if (addr == (addr_t) &dummy->regs.acrs[15])
+ if (addr == offsetof(struct user, regs.acrs[15]))
child->thread.acrs[15] = (unsigned int) (data >> 32);
else
*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
- } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ } else if (addr == offsetof(struct user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
task_pt_regs(child)->orig_gpr2 = data;
- } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ } else if (addr < offsetof(struct user, regs.fp_regs)) {
/*
* prevent writes of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
return 0;
- } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ } else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
/*
* floating point control reg. is in the thread structure
*/
@@ -397,12 +390,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
return -EINVAL;
child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
- } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.fpu
* or the child->thread.fpu.vxrs array
*/
- offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ offset = addr - offsetof(struct user, regs.fp_regs.fprs);
if (MACHINE_HAS_VX)
*(addr_t *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = data;
@@ -410,11 +403,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
*(addr_t *)((addr_t)
child->thread.fpu.fprs + offset) = data;
- } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy->regs.per_info;
+ addr -= offsetof(struct user, regs.per_info);
__poke_user_per(child, addr, data);
}
@@ -431,8 +424,8 @@ static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
* an alignment of 4. Programmers from hell indeed...
*/
mask = __ADDR_MASK;
- if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
- addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ if (addr >= offsetof(struct user, regs.acrs) &&
+ addr < offsetof(struct user, regs.orig_gpr2))
mask = 3;
if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
return -EIO;
@@ -540,37 +533,35 @@ long arch_ptrace(struct task_struct *child, long request,
static inline __u32 __peek_user_per_compat(struct task_struct *child,
addr_t addr)
{
- struct compat_per_struct_kernel *dummy32 = NULL;
-
- if (addr == (addr_t) &dummy32->cr9)
+ if (addr == offsetof(struct compat_per_struct_kernel, cr9))
/* Control bits of the active per set. */
return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
PER_EVENT_IFETCH : child->thread.per_user.control;
- else if (addr == (addr_t) &dummy32->cr10)
+ else if (addr == offsetof(struct compat_per_struct_kernel, cr10))
/* Start address of the active per set. */
return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
0 : child->thread.per_user.start;
- else if (addr == (addr_t) &dummy32->cr11)
+ else if (addr == offsetof(struct compat_per_struct_kernel, cr11))
/* End address of the active per set. */
return test_thread_flag(TIF_SINGLE_STEP) ?
PSW32_ADDR_INSN : child->thread.per_user.end;
- else if (addr == (addr_t) &dummy32->bits)
+ else if (addr == offsetof(struct compat_per_struct_kernel, bits))
/* Single-step bit. */
return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
0x80000000 : 0;
- else if (addr == (addr_t) &dummy32->starting_addr)
+ else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
/* Start address of the user specified per set. */
return (__u32) child->thread.per_user.start;
- else if (addr == (addr_t) &dummy32->ending_addr)
+ else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
/* End address of the user specified per set. */
return (__u32) child->thread.per_user.end;
- else if (addr == (addr_t) &dummy32->perc_atmid)
+ else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid))
/* PER code, ATMID and AI of the last PER trap */
return (__u32) child->thread.per_event.cause << 16;
- else if (addr == (addr_t) &dummy32->address)
+ else if (addr == offsetof(struct compat_per_struct_kernel, address))
/* Address of the last PER trap */
return (__u32) child->thread.per_event.address;
- else if (addr == (addr_t) &dummy32->access_id)
+ else if (addr == offsetof(struct compat_per_struct_kernel, access_id))
/* Access id of the last PER trap */
return (__u32) child->thread.per_event.paid << 24;
return 0;
@@ -581,21 +572,20 @@ static inline __u32 __peek_user_per_compat(struct task_struct *child,
*/
static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
{
- struct compat_user *dummy32 = NULL;
addr_t offset;
__u32 tmp;
- if (addr < (addr_t) &dummy32->regs.acrs) {
+ if (addr < offsetof(struct compat_user, regs.acrs)) {
struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
- if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ if (addr == offsetof(struct compat_user, regs.psw.mask)) {
/* Fake a 31 bit psw mask. */
tmp = (__u32)(regs->psw.mask >> 32);
tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
tmp |= PSW32_USER_BITS;
- } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ } else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
/* Fake a 31 bit psw address. */
tmp = (__u32) regs->psw.addr |
(__u32)(regs->psw.mask & PSW_MASK_BA);
@@ -603,38 +593,38 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
/* gpr 0-15 */
tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
}
- } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy32->regs.acrs;
+ offset = addr - offsetof(struct compat_user, regs.acrs);
tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
- } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
- } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ } else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
/*
* prevent reads of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
tmp = 0;
- } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
/*
* floating point control reg. is in the thread structure
*/
tmp = child->thread.fpu.fpc;
- } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.fpu
* or the child->thread.fpu.vxrs array
*/
- offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
if (MACHINE_HAS_VX)
tmp = *(__u32 *)
((addr_t) child->thread.fpu.vxrs + 2*offset);
@@ -642,11 +632,11 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
tmp = *(__u32 *)
((addr_t) child->thread.fpu.fprs + offset);
- } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy32->regs.per_info;
+ addr -= offsetof(struct compat_user, regs.per_info);
tmp = __peek_user_per_compat(child, addr);
} else
@@ -673,16 +663,14 @@ static int peek_user_compat(struct task_struct *child,
static inline void __poke_user_per_compat(struct task_struct *child,
addr_t addr, __u32 data)
{
- struct compat_per_struct_kernel *dummy32 = NULL;
-
- if (addr == (addr_t) &dummy32->cr9)
+ if (addr == offsetof(struct compat_per_struct_kernel, cr9))
/* PER event mask of the user specified per set. */
child->thread.per_user.control =
data & (PER_EVENT_MASK | PER_CONTROL_MASK);
- else if (addr == (addr_t) &dummy32->starting_addr)
+ else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
/* Starting address of the user specified per set. */
child->thread.per_user.start = data;
- else if (addr == (addr_t) &dummy32->ending_addr)
+ else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
/* Ending address of the user specified per set. */
child->thread.per_user.end = data;
}
@@ -693,16 +681,15 @@ static inline void __poke_user_per_compat(struct task_struct *child,
static int __poke_user_compat(struct task_struct *child,
addr_t addr, addr_t data)
{
- struct compat_user *dummy32 = NULL;
__u32 tmp = (__u32) data;
addr_t offset;
- if (addr < (addr_t) &dummy32->regs.acrs) {
+ if (addr < offsetof(struct compat_user, regs.acrs)) {
struct pt_regs *regs = task_pt_regs(child);
/*
* psw, gprs, acrs and orig_gpr2 are stored on the stack
*/
- if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ if (addr == offsetof(struct compat_user, regs.psw.mask)) {
__u32 mask = PSW32_MASK_USER;
mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
@@ -716,7 +703,7 @@ static int __poke_user_compat(struct task_struct *child,
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
(regs->psw.mask & PSW_MASK_BA) |
(__u64)(tmp & mask) << 32;
- } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ } else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
/* Build a 64 bit psw address from 31 bit address. */
regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
/* Transfer 31 bit amode bit to psw mask. */
@@ -732,27 +719,27 @@ static int __poke_user_compat(struct task_struct *child,
/* gpr 0-15 */
*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
}
- } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ } else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
/*
* access registers are stored in the thread structure
*/
- offset = addr - (addr_t) &dummy32->regs.acrs;
+ offset = addr - offsetof(struct compat_user, regs.acrs);
*(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
- } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ } else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
/*
* orig_gpr2 is stored on the kernel stack
*/
*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
- } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ } else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
/*
* prevent writess of padding hole between
* orig_gpr2 and fp_regs on s390.
*/
return 0;
- } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ } else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
/*
* floating point control reg. is in the thread structure
*/
@@ -760,12 +747,12 @@ static int __poke_user_compat(struct task_struct *child,
return -EINVAL;
child->thread.fpu.fpc = data;
- } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
* floating point regs. are either in child->thread.fpu
* or the child->thread.fpu.vxrs array
*/
- offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
if (MACHINE_HAS_VX)
*(__u32 *)((addr_t)
child->thread.fpu.vxrs + 2*offset) = tmp;
@@ -773,11 +760,11 @@ static int __poke_user_compat(struct task_struct *child,
*(__u32 *)((addr_t)
child->thread.fpu.fprs + offset) = tmp;
- } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
*/
- addr -= (addr_t) &dummy32->regs.per_info;
+ addr -= offsetof(struct compat_user, regs.per_info);
__poke_user_per_compat(child, addr, data);
}
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index fe396673e8a6..a9a1a6f45375 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -2,8 +2,7 @@
/*
* Copyright IBM Corp. 2005
*
- * Author(s): Rolf Adelsberger,
- * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Author(s): Rolf Adelsberger
*
*/
@@ -15,6 +14,7 @@
* moves the new kernel to its destination...
* %r2 = pointer to first kimage_entry_t
* %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
*
* %r5 will be used as temp. storage
* %r6 holds the destination address
@@ -57,7 +57,7 @@ ENTRY(relocate_kernel)
jo 0b
j .base
.done:
- sgr %r0,%r0 # clear register r0
+ lgr %r0,%r4 # subcode
cghi %r3,0
je .diag
la %r4,load_psw-.base(%r13) # load psw-address into the register
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 225ab2d0a4c6..ab19ddb09d65 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@@ -74,6 +74,7 @@
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
+#include <asm/maccess.h>
#include <asm/uv.h>
#include <asm/asm-offsets.h>
#include "entry.h"
@@ -395,6 +396,7 @@ void __init arch_call_rest_init(void)
{
unsigned long stack;
+ smp_reinit_ipl_cpu();
stack = stack_alloc();
if (!stack)
panic("Couldn't allocate kernel stack");
@@ -411,8 +413,9 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
+ struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
- struct lowcore *lc;
+ unsigned long flags;
if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
@@ -445,7 +448,7 @@ static void __init setup_lowcore_dat_off(void)
lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
lc->preempt_count = S390_lowcore.preempt_count;
- nmi_alloc_boot_cpu(lc);
+ nmi_alloc_mcesa_early(&lc->mcesad);
lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
lc->exit_timer = S390_lowcore.exit_timer;
lc->user_timer = S390_lowcore.user_timer;
@@ -474,46 +477,53 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc, flags);
+
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, THREAD_SIZE, THREAD_SIZE);
lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
- /* Setup absolute zero lowcore */
- mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
- mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
- mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
- mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
- mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
-
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
- lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- set_prefix((u32)(unsigned long) lc);
+ set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
}
static void __init setup_lowcore_dat_on(void)
{
- struct lowcore *lc = lowcore_ptr[0];
+ struct lowcore *abs_lc;
+ unsigned long flags;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
__ctl_set_bit(0, 28);
- mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
- mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
- memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
- sizeof(S390_lowcore.cregs_save_area));
+ __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
+ if (abs_lowcore_map(0, lowcore_ptr[0], true))
+ panic("Couldn't setup absolute lowcore");
+ abs_lowcore_mapped = true;
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = S390_lowcore.program_new_psw;
+ memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
+ sizeof(abs_lc->cregs_save_area));
+ put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
@@ -800,6 +810,8 @@ static void __init check_initrd(void)
static void __init reserve_kernel(void)
{
memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+ memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
+ memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
memblock_reserve(__amode31_base, __eamode31 - __samode31);
memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
memblock_reserve(__pa(_stext), _end - _stext);
@@ -873,6 +885,9 @@ static void __init setup_randomness(void)
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
+
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ static_branch_enable(&s390_arch_random_available);
}
/*
@@ -1014,10 +1029,10 @@ void __init setup_arch(char **cmdline_p)
reserve_crashkernel();
#ifdef CONFIG_CRASH_DUMP
/*
- * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
* Therefore CPU and device initialization should be done afterwards.
*/
- smp_save_dump_cpus();
+ smp_save_dump_secondary_cpus();
#endif
setup_resources();
@@ -1036,12 +1051,15 @@ void __init setup_arch(char **cmdline_p)
* Create kernel page tables and switch to virtual addressing.
*/
paging_init();
-
+ memcpy_real_init();
/*
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+ smp_save_dump_ipl_cpu();
+#endif
/* Setup default console */
conmode_default();
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 307f5d99514d..38258f817048 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -25,7 +25,6 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
-#include <linux/tracehook.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <asm/ucontext.h>
@@ -141,7 +140,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
_sigregs user_sregs;
- /* Alwys make any pending restarted system call return -EINTR */
+ /* Always make any pending restarted system call return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
@@ -453,7 +452,7 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
* stack-frames in one go after that.
*/
-void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
struct ksignal ksig;
sigset_t *oldset = sigmask_to_save();
@@ -466,7 +465,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
current->thread.system_call =
test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
- if (has_signal && get_signal(&ksig)) {
+ if (get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
if (current->thread.system_call) {
regs->int_code = current->thread.system_call;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 78a8ea6fd582..0031325ce4bc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -5,7 +5,6 @@
* Copyright IBM Corp. 1999, 2012
* Author(s): Denis Joseph Barrow,
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*
* based on other smp stuff by
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
@@ -46,7 +45,7 @@
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@@ -56,6 +55,7 @@
#include <asm/stacktrace.h>
#include <asm/topology.h>
#include <asm/vdso.h>
+#include <asm/maccess.h>
#include "entry.h"
enum {
@@ -208,16 +208,19 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
- lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- if (nmi_alloc_per_cpu(lc))
+ if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
+ if (abs_lowcore_map(cpu, lc, true))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
- pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@@ -239,7 +242,8 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
- nmi_free_per_cpu(lc);
+ abs_lowcore_unmap(cpu);
+ nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
free_pages(nodat_stack, THREAD_SIZE_ORDER);
@@ -317,9 +321,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
- struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned int source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
+ unsigned long flags;
+ lc = lowcore_ptr[pcpu - pcpu_devices];
+ source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
@@ -328,10 +335,19 @@ static void pcpu_delegate(struct pcpu *pcpu,
/* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
/* Restart func on the target cpu and stop the current cpu. */
- mem_assign_absolute(lc->restart_stack, stack);
- mem_assign_absolute(lc->restart_fn, (unsigned long) func);
- mem_assign_absolute(lc->restart_data, (unsigned long) data);
- mem_assign_absolute(lc->restart_source, source_cpu);
+ if (lc) {
+ lc->restart_stack = stack;
+ lc->restart_fn = (unsigned long)func;
+ lc->restart_data = (unsigned long)data;
+ lc->restart_source = source_cpu;
+ } else {
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc, flags);
+ }
__bpon();
asm volatile(
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
@@ -572,39 +588,31 @@ static void smp_ctl_bit_callback(void *info)
}
static DEFINE_SPINLOCK(ctl_lock);
-static unsigned long ctlreg;
-/*
- * Set a bit in a control register of all cpus
- */
-void smp_ctl_set_bit(int cr, int bit)
-{
- struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
-
- spin_lock(&ctl_lock);
- memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
- __set_bit(bit, &ctlreg);
- memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
- spin_unlock(&ctl_lock);
- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
-}
-EXPORT_SYMBOL(smp_ctl_set_bit);
-
-/*
- * Clear a bit in a control register of all cpus
- */
-void smp_ctl_clear_bit(int cr, int bit)
+void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
- struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
-
+ struct ec_creg_mask_parms parms = { .cr = cr, };
+ struct lowcore *abs_lc;
+ unsigned long flags;
+ u64 ctlreg;
+
+ if (set) {
+ parms.orval = 1UL << bit;
+ parms.andval = -1UL;
+ } else {
+ parms.orval = 0;
+ parms.andval = ~(1UL << bit);
+ }
spin_lock(&ctl_lock);
- memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg));
- __clear_bit(bit, &ctlreg);
- memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg));
+ abs_lc = get_abs_lowcore(&flags);
+ ctlreg = abs_lc->cregs_save_area[cr];
+ ctlreg = (ctlreg & parms.andval) | parms.orval;
+ abs_lc->cregs_save_area[cr] = ctlreg;
+ put_abs_lowcore(abs_lc, flags);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
-EXPORT_SYMBOL(smp_ctl_clear_bit);
+EXPORT_SYMBOL(smp_ctl_set_clear_bit);
#ifdef CONFIG_CRASH_DUMP
@@ -622,7 +630,7 @@ int smp_store_status(int cpu)
return -EIO;
if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
return 0;
- pa = __pa(lc->mcesad & MCESA_ORIGIN_MASK);
+ pa = lc->mcesad & MCESA_ORIGIN_MASK;
if (MACHINE_HAS_GS)
pa |= lc->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
@@ -657,42 +665,39 @@ int smp_store_status(int cpu)
* This case does not exist for s390 anymore, setup_arch explicitly
* deactivates the elfcorehdr= kernel parameter
*/
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, unsigned long page)
+static bool dump_available(void)
{
- __vector128 *vxrs = (__vector128 *) page;
-
- if (is_boot_cpu)
- vxrs = boot_cpu_vector_save_area;
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page);
- save_area_add_vxrs(sa, vxrs);
+ return oldmem_data.start || is_ipl_type_dump();
}
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, unsigned long page)
+void __init smp_save_dump_ipl_cpu(void)
{
- void *regs = (void *) page;
+ struct save_area *sa;
+ void *regs;
- if (is_boot_cpu)
- copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512);
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page);
+ if (!dump_available())
+ return;
+ sa = save_area_alloc(true);
+ regs = memblock_alloc(512, 8);
+ if (!sa || !regs)
+ panic("could not allocate memory for boot CPU save area\n");
+ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
+ memblock_free(regs, 512);
+ if (MACHINE_HAS_VX)
+ save_area_add_vxrs(sa, boot_cpu_vector_save_area);
}
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
{
int addr, boot_cpu_addr, max_cpu_addr;
struct save_area *sa;
- unsigned long page;
- bool is_boot_cpu;
+ void *page;
- if (!(oldmem_data.start || is_ipl_type_dump()))
- /* No previous system present, normal boot. */
+ if (!dump_available())
return;
/* Allocate a page as dumping area for the store status sigps */
- page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
+ page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
if (!page)
panic("ERROR: Failed to allocate %lx bytes below %lx\n",
PAGE_SIZE, 1UL << 31);
@@ -702,28 +707,22 @@ void __init smp_save_dump_cpus(void)
boot_cpu_addr = stap();
max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (addr == boot_cpu_addr)
+ continue;
if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- is_boot_cpu = (addr == boot_cpu_addr);
- /* Allocate save area */
- sa = save_area_alloc(is_boot_cpu);
+ sa = save_area_alloc(false);
if (!sa)
panic("could not allocate memory for save area\n");
- if (MACHINE_HAS_VX)
- /* Get the vector registers */
- smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
- /*
- * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
- * of the boot CPU are stored in the HSA. To retrieve
- * these registers an SCLP request is required which is
- * done by drivers/s390/char/zcore.c:init_cpu_info()
- */
- if (!is_boot_cpu || oldmem_data.start)
- /* Get the CPU registers */
- smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+ save_area_add_regs(sa, page);
+ if (MACHINE_HAS_VX) {
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+ save_area_add_vxrs(sa, page);
+ }
}
- memblock_phys_free(page, PAGE_SIZE);
+ memblock_free(page, PAGE_SIZE);
diag_amode31_ops.diag308_reset();
pcpu_set_smt(0);
}
@@ -880,7 +879,7 @@ void __init smp_detect_cpus(void)
/* Add CPUs present at boot */
__smp_rescan_cpus(info, true);
- memblock_phys_free((unsigned long)info, sizeof(*info));
+ memblock_free(info, sizeof(*info));
}
/*
@@ -1257,7 +1256,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc)
src.odd = sizeof(S390_lowcore);
dst.even = (unsigned long) lc;
dst.odd = sizeof(*lc);
- pfx = (unsigned long) lc;
+ pfx = __pa(lc);
asm volatile(
" mvcl %[dst],%[src]\n"
@@ -1267,18 +1266,19 @@ static __always_inline void set_new_lowcore(struct lowcore *lc)
: "memory", "cc");
}
-static int __init smp_reinit_ipl_cpu(void)
+int __init smp_reinit_ipl_cpu(void)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc, *lc_ipl;
- unsigned long flags;
+ unsigned long flags, cr0;
+ u64 mcesad;
lc_ipl = lowcore_ptr[0];
lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
async_stack = stack_alloc();
mcck_stack = stack_alloc();
- if (!lc || !nodat_stack || !async_stack || !mcck_stack)
+ if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
panic("Couldn't allocate memory");
local_irq_save(flags);
@@ -1287,14 +1287,19 @@ static int __init smp_reinit_ipl_cpu(void)
S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+ __ctl_store(cr0, 0, 0);
+ __ctl_clear_bit(0, 28); /* disable lowcore protection */
+ S390_lowcore.mcesad = mcesad;
+ __ctl_load(cr0, 0, 0);
+ if (abs_lowcore_map(0, lc, false))
+ panic("Couldn't remap absolute lowcore");
lowcore_ptr[0] = lc;
local_mcck_enable();
local_irq_restore(flags);
free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER);
- memblock_free_late(lc_ipl->mcck_stack - STACK_INIT_OFFSET, THREAD_SIZE);
- memblock_free_late((unsigned long) lc_ipl, sizeof(*lc_ipl));
+ memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+ memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
return 0;
}
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index b7bb1981e9ee..7ee455e8e3d5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -3,7 +3,6 @@
* Stack trace management functions
*
* Copyright IBM Corp. 2006
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#include <linux/stacktrace.h>
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index b98f25029b8e..fb85e797946d 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -21,8 +21,7 @@ uapi: $(uapi-hdrs-y)
# Create output directory if not already present
-_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
- $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+$(shell mkdir -p $(uapi) $(kapi))
filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index ed9c5c2eafad..799147658dee 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -452,3 +452,4 @@
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
+450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index ef3f2659876c..b5e364358ce4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -14,6 +14,7 @@
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/slab.h>
+#include <asm/asm-extable.h>
#include <asm/ebcdic.h>
#include <asm/debug.h>
#include <asm/sysinfo.h>
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 868e4a604110..2c8b14cc5556 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-extable.h>
#include <asm/errno.h>
#include <asm/sigp.h>
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 326cb8f75f58..6b7b6d5e3632 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -364,7 +364,7 @@ static inline int check_sync_clock(void)
* Apply clock delta to the global data structures.
* This is called once on the CPU that performed the clock sync.
*/
-static void clock_sync_global(unsigned long delta)
+static void clock_sync_global(long delta)
{
unsigned long now, adj;
struct ptff_qto qto;
@@ -400,7 +400,7 @@ static void clock_sync_global(unsigned long delta)
* Apply clock delta to the per-CPU data structures of this CPU.
* This is called for each online CPU after the call to clock_sync_global.
*/
-static void clock_sync_local(unsigned long delta)
+static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
if (S390_lowcore.clock_comparator != clock_comparator_max) {
@@ -424,7 +424,7 @@ static void __init time_init_wq(void)
struct clock_sync_data {
atomic_t cpus;
int in_sync;
- unsigned long clock_delta;
+ long clock_delta;
};
/*
@@ -544,7 +544,7 @@ static int stpinfo_valid(void)
static int stp_sync_clock(void *data)
{
struct clock_sync_data *sync = data;
- u64 clock_delta, flags;
+ long clock_delta, flags;
static int first;
int rc;
@@ -554,9 +554,7 @@ static int stp_sync_clock(void *data)
while (atomic_read(&sync->cpus) != 0)
cpu_relax();
rc = 0;
- if (stp_info.todoff[0] || stp_info.todoff[1] ||
- stp_info.todoff[2] || stp_info.todoff[3] ||
- stp_info.tmd != 2) {
+ if (stp_info.todoff || stp_info.tmd != 2) {
flags = vdso_update_begin();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
&clock_delta);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 58f8291950cb..c6eecd4a5302 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2011
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#define KMSG_COMPONENT "cpu"
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 2b780786fc68..1d2aa448d103 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <linux/entry-common.h>
+#include <asm/asm-extable.h>
#include <asm/fpu/api.h>
#include <asm/vtime.h>
#include "entry.h"
@@ -53,9 +54,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
} else {
- const struct exception_table_entry *fixup;
- fixup = s390_search_extables(regs->psw.addr);
- if (!fixup || !ex_handle(fixup, regs))
+ if (!fixup_exception(regs))
die(regs, str);
}
}
@@ -142,10 +141,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
do_trap(regs, SIGFPE, si_code, "floating point exception");
}
-static void translation_exception(struct pt_regs *regs)
+static void translation_specification_exception(struct pt_regs *regs)
{
/* May never happen. */
- panic("Translation exception");
+ panic("Translation-Specification Exception");
}
static void illegal_op(struct pt_regs *regs)
@@ -244,16 +243,12 @@ static void space_switch_exception(struct pt_regs *regs)
static void monitor_event_exception(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
-
if (user_mode(regs))
return;
switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
case BUG_TRAP_TYPE_NONE:
- fixup = s390_search_extables(regs->psw.addr);
- if (fixup)
- ex_handle(fixup, regs);
+ fixup_exception(regs);
break;
case BUG_TRAP_TYPE_WARN:
break;
@@ -291,7 +286,6 @@ static void __init test_monitor_call(void)
void __init trap_init(void)
{
- sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
local_mcck_enable();
test_monitor_call();
}
@@ -303,7 +297,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
unsigned int trapnr;
irqentry_state_t state;
- regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc;
+ regs->int_code = S390_lowcore.pgm_int_code;
regs->int_parm_long = S390_lowcore.trans_exc_code;
state = irqentry_enter(regs);
@@ -328,7 +322,7 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
set_thread_flag(TIF_PER_TRAP);
ev->address = S390_lowcore.per_address;
- ev->cause = *(u16 *)&S390_lowcore.per_code;
+ ev->cause = S390_lowcore.per_code_combined;
ev->paid = S390_lowcore.per_access_id;
} else {
/* PER event in kernel is kprobes */
@@ -374,7 +368,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
[0x0f] = hfp_divide_exception,
[0x10] = do_dat_exception,
[0x11] = do_dat_exception,
- [0x12] = translation_exception,
+ [0x12] = translation_specification_exception,
[0x13] = special_op_exception,
[0x14] = default_trap_handler,
[0x15] = operand_exception,
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 707fd99f6734..0ece156fdd7c 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -64,8 +64,8 @@ bool unwind_next_frame(struct unwind_state *state)
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
- if (!__kernel_text_address(ip)) {
- /* skip bogus %r14 */
+ /* skip bogus %r14 or if is the same as regs->psw.addr */
+ if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) {
state->regs = NULL;
return unwind_next_frame(state);
}
@@ -103,13 +103,11 @@ bool unwind_next_frame(struct unwind_state *state)
if (sp & 0x7)
goto out_err;
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
-
/* Update unwind state */
state->sp = sp;
- state->ip = ip;
state->regs = regs;
state->reliable = reliable;
+ state->ip = unwind_recover_ret_addr(state, ip);
return true;
out_err:
@@ -161,12 +159,10 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
}
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
-
/* Update unwind state */
state->sp = sp;
- state->ip = ip;
state->reliable = true;
+ state->ip = unwind_recover_ret_addr(state, ip);
if (!first_frame)
return;
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index bd3ef121c379..b88345ef8bd9 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -177,9 +177,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len)
__typeof__(*(ptr)) input; \
int __rc = 0; \
\
- if (!test_facility(34)) \
- __rc = EMU_ILLEGAL_OP; \
- else if ((u64 __force)ptr & mask) \
+ if ((u64 __force)ptr & mask) \
__rc = EMU_SPECIFICATION; \
else if (get_user(input, ptr)) \
__rc = EMU_ADDRESSING; \
@@ -194,9 +192,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len)
__typeof__(ptr) __ptr = (ptr); \
int __rc = 0; \
\
- if (!test_facility(34)) \
- __rc = EMU_ILLEGAL_OP; \
- else if ((u64 __force)__ptr & mask) \
+ if ((u64 __force)__ptr & mask) \
__rc = EMU_SPECIFICATION; \
else if (put_user(*(input), __ptr)) \
__rc = EMU_ADDRESSING; \
@@ -213,9 +209,7 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len)
__typeof__(*(ptr)) input; \
int __rc = 0; \
\
- if (!test_facility(34)) \
- __rc = EMU_ILLEGAL_OP; \
- else if ((u64 __force)ptr & mask) \
+ if ((u64 __force)ptr & mask) \
__rc = EMU_SPECIFICATION; \
else if (get_user(input, ptr)) \
__rc = EMU_ADDRESSING; \
@@ -327,10 +321,6 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
break;
case 0xc6:
switch (insn->opc1) {
- case 0x02: /* pfdrl */
- if (!test_facility(34))
- rc = EMU_ILLEGAL_OP;
- break;
case 0x04: /* cghrl */
rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
break;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 386d4e42b8d3..f9810d2a267c 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -30,7 +30,7 @@ int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
EXPORT_SYMBOL(uv_info);
-static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
{
struct uv_cb_init uvcb = {
.header.cmd = UVC_CMD_INIT_UV,
@@ -49,12 +49,12 @@ static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
void __init setup_uv(void)
{
- unsigned long uv_stor_base;
+ void *uv_stor_base;
if (!is_prot_virt_host())
return;
- uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+ uv_stor_base = memblock_alloc_try_nid(
uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
if (!uv_stor_base) {
@@ -63,8 +63,8 @@ void __init setup_uv(void)
goto fail;
}
- if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
- memblock_phys_free(uv_stor_base, uv_info.uv_base_stor_len);
+ if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
goto fail;
}
@@ -234,6 +234,32 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
/*
* Requests the Ultravisor to make a page accessible to a guest.
* If it's brought in the first time, it will be cleared. If
@@ -277,6 +303,8 @@ again:
lock_page(page);
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(page_to_phys(page));
rc = make_secure_pte(ptep, uaddr, page, uvcb);
pte_unmap_unlock(ptep, ptelock);
unlock_page(page);
@@ -334,6 +362,61 @@ int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
}
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+/**
+ * gmap_destroy_page - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+ struct vm_area_struct *vma;
+ unsigned long uaddr;
+ struct page *page;
+ int rc;
+
+ rc = -EFAULT;
+ mmap_read_lock(gmap->mm);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = vma_lookup(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Huge pages should not be able to become secure
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = 0;
+ /* we take an extra reference here */
+ page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+ rc = uv_destroy_owned_page(page_to_phys(page));
+ /*
+ * Fault handlers can race; it is possible that two CPUs will fault
+ * on the same secure page. One CPU can destroy the page, reboot,
+ * re-enter secure mode and import it, while the second CPU was
+ * stuck at the beginning of the handler. At some point the second
+ * CPU will be able to progress, and it will not be able to destroy
+ * the page. In that case we do not want to terminate the process,
+ * we instead try to export the page.
+ */
+ if (rc)
+ rc = uv_convert_owned_from_secure(page_to_phys(page));
+ put_page(page);
+out:
+ mmap_read_unlock(gmap->mm);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_destroy_page);
+
/*
* To be called with the page locked or with an extra reference! This will
* prevent gmap_make_secure from touching the page concurrently. Having 2
@@ -392,6 +475,54 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
static struct kobj_attribute uv_query_facilities_attr =
__ATTR(facilities, 0444, uv_query_facilities, NULL);
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+ __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+ __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+ __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+ __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+ __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
static ssize_t uv_query_feature_indications(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -431,12 +562,37 @@ static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
static struct kobj_attribute uv_query_max_guest_addr_attr =
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+ __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+ __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
&uv_query_max_guest_cpus_attr.attr,
&uv_query_max_guest_vms_attr.attr,
&uv_query_max_guest_addr_attr.attr,
+ &uv_query_supp_se_hdr_ver_attr.attr,
+ &uv_query_supp_se_hdr_pcf_attr.attr,
+ &uv_query_dump_storage_state_len_attr.attr,
+ &uv_query_dump_finalize_len_attr.attr,
+ &uv_query_dump_cpu_len_attr.attr,
+ &uv_query_supp_att_req_hdr_ver_attr.attr,
+ &uv_query_supp_att_pflags_attr.attr,
NULL,
};
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 99694260cac9..3105ca5bd470 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/time_namespace.h>
+#include <linux/random.h>
#include <vdso/datapage.h>
#include <asm/vdso.h>
@@ -68,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
mmap_read_lock(mm);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
unsigned long size = vma->vm_end - vma->vm_start;
if (!vma_is_special_mapping(vma, &vvar_mapping))
@@ -160,10 +162,9 @@ int vdso_getcpu_init(void)
}
early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
{
- unsigned long vdso_text_len, vdso_mapping_len;
- unsigned long vvar_start, vdso_text_start;
+ unsigned long vvar_start, vdso_text_start, vdso_text_len;
struct vm_special_mapping *vdso_mapping;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -180,8 +181,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
vdso_text_len = vdso64_end - vdso64_start;
vdso_mapping = &vdso64_mapping;
}
- vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
- vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
@@ -210,6 +210,52 @@ out:
return rc;
}
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
+{
+ unsigned long addr, end, offset;
+
+ /*
+ * Round up the start address. It can start out unaligned as a result
+ * of stack start randomization.
+ */
+ start = PAGE_ALIGN(start);
+
+ /* Round the lowest possible end address up to a PMD boundary. */
+ end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= VDSO_BASE)
+ end = VDSO_BASE;
+ end -= len;
+
+ if (end > start) {
+ offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1);
+ addr = start + (offset << PAGE_SHIFT);
+ } else {
+ addr = start;
+ }
+ return addr;
+}
+
+unsigned long vdso_size(void)
+{
+ unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+
+ if (is_compat_task())
+ size += vdso32_end - vdso32_start;
+ else
+ size += vdso64_end - vdso64_start;
+ return PAGE_ALIGN(size);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ unsigned long addr = VDSO_BASE;
+ unsigned long size = vdso_size();
+
+ if (current->flags & PF_RANDOMIZE)
+ addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+ return map_vdso(addr, size);
+}
+
static struct page ** __init vdso_setup_pages(void *start, void *end)
{
int pages = (end - start) >> PAGE_SHIFT;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 42c43521878f..5ea3830af0cc 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -49,7 +49,6 @@ SECTIONS
SOFTIRQENTRY_TEXT
FTRACE_HOTPATCH_TRAMPOLINES_TEXT
*(.text.*_indirect_*)
- *(.fixup)
*(.gnu.warning)
. = ALIGN(PAGE_SIZE);
_etext = .; /* End of text section */
@@ -132,6 +131,7 @@ SECTIONS
/*
* Table with the patch locations to undo expolines
*/
+ . = ALIGN(4);
.nospec_call_table : {
__nospec_call_start = . ;
*(.s390_indirect*)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index f216a1b2f825..9436f3053b88 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -128,13 +128,12 @@ static int do_account_vtime(struct task_struct *tsk)
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
- /* Use STORE CLOCK by default, STORE CLOCK FAST if available. */
- alternative_io("stpt %0\n .insn s,0xb2050000,%1\n",
- "stpt %0\n .insn s,0xb27c0000,%1\n",
- 25,
- ASM_OUTPUT2("=Q" (S390_lowcore.last_update_timer),
- "=Q" (S390_lowcore.last_update_clock)),
- ASM_NO_INPUT_CLOBBER("cc"));
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " stckf %1" /* Store current tod clock value */
+ : "=Q" (S390_lowcore.last_update_timer),
+ "=Q" (S390_lowcore.last_update_clock)
+ : : "cc");
clock = S390_lowcore.last_update_clock - clock;
timer -= S390_lowcore.last_update_timer;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 67a8e770e369..33f4ff909476 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -33,6 +33,8 @@ config KVM
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
+ select INTERVAL_TREE
+ select MMU_NOTIFIER
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3aaadc60ead..02217fb4ae10 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -3,13 +3,12 @@
#
# Copyright IBM Corp. 2008
-KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o \
- $(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o
+include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6af59c59cc1b..0243b6e38d36 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
#include <linux/mm_types.h>
#include <linux/err.h>
#include <linux/pgtable.h>
+#include <linux/bitfield.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
@@ -261,77 +262,77 @@ struct aste {
/* .. more fields there */
};
-int ipte_lock_held(struct kvm_vcpu *vcpu)
+int ipte_lock_held(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII) {
+ if (sclp.has_siif) {
int rc;
- read_lock(&vcpu->kvm->arch.sca_lock);
- rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_lock(&kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+ read_unlock(&kvm->arch.sca_lock);
return rc;
}
- return vcpu->kvm->arch.ipte_lock_count != 0;
+ return kvm->arch.ipte_lock_count != 0;
}
-static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+static void ipte_lock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count++;
- if (vcpu->kvm->arch.ipte_lock_count > 1)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count++;
+ if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
if (old.k) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+static void ipte_unlock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count--;
- if (vcpu->kvm->arch.ipte_lock_count)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count--;
+ if (kvm->arch.ipte_lock_count)
goto out;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ read_unlock(&kvm->arch.sca_lock);
+ wake_up(&kvm->arch.ipte_wq);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+static void ipte_lock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
if (old.kg) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
@@ -339,15 +340,15 @@ retry:
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
}
-static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
do {
old = READ_ONCE(*ic);
new = old;
@@ -355,25 +356,25 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&kvm->arch.ipte_wq);
}
-void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_lock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_lock_siif(kvm);
else
- ipte_lock_simple(vcpu);
+ ipte_lock_simple(kvm);
}
-void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_unlock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_unlock_siif(kvm);
else
- ipte_unlock_simple(vcpu);
+ ipte_unlock_simple(kvm);
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
@@ -488,10 +489,12 @@ enum prot_type {
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
+ /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+ PROT_NONE,
};
-static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
- u8 ar, enum gacc_mode mode, enum prot_type prot)
+static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+ enum gacc_mode mode, enum prot_type prot, bool terminate)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
struct trans_exc_code_bits *tec;
@@ -503,6 +506,10 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_NONE:
+ /* We should never get here, acts like termination */
+ WARN_ON_ONCE(1);
+ break;
case PROT_TYPE_IEP:
tec->b61 = 1;
fallthrough;
@@ -519,6 +526,11 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
tec->b61 = 1;
break;
}
+ if (terminate) {
+ tec->b56 = 0;
+ tec->b60 = 0;
+ tec->b61 = 0;
+ }
fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@@ -551,6 +563,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
return code;
}
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
+ enum gacc_mode mode, enum prot_type prot)
+{
+ return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false);
+}
+
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
unsigned long ga, u8 ar, enum gacc_mode mode)
{
@@ -794,48 +812,270 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
- unsigned long *pages, unsigned long nr_pages,
- const union asce asce, enum gacc_mode mode)
+static int vm_check_access_key(struct kvm *kvm, u8 access_key,
+ enum gacc_mode mode, gpa_t gpa)
+{
+ u8 storage_key, access_control;
+ bool fetch_protected;
+ unsigned long hva;
+ int r;
+
+ if (access_key == 0)
+ return 0;
+
+ hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ if (access_control == access_key)
+ return 0;
+ fetch_protected = storage_key & _PAGE_FP_BIT;
+ if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected)
+ return 0;
+ return PGM_PROTECTION;
+}
+
+static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode,
+ union asce asce)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long override;
+
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* check if fetch protection override enabled */
+ override = vcpu->arch.sie_block->gcr[0];
+ override &= CR0_FETCH_PROTECTION_OVERRIDE;
+ /* not applicable if subject to DAT && private space */
+ override = override && !(psw_bits(*psw).dat && asce.p);
+ return override;
+ }
+ return false;
+}
+
+static bool fetch_prot_override_applies(unsigned long ga, unsigned int len)
+{
+ return ga < 2048 && ga + len <= 2048;
+}
+
+static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu)
+{
+ /* check if storage protection override enabled */
+ return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE;
+}
+
+static bool storage_prot_override_applies(u8 access_control)
+{
+ /* matches special storage protection override key (9) -> allow */
+ return access_control == PAGE_SPO_ACC;
+}
+
+static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key,
+ enum gacc_mode mode, union asce asce, gpa_t gpa,
+ unsigned long ga, unsigned int len)
+{
+ u8 storage_key, access_control;
+ unsigned long hva;
+ int r;
+
+ /* access key 0 matches any storage key -> allow */
+ if (access_key == 0)
+ return 0;
+ /*
+ * caller needs to ensure that gfn is accessible, so we can
+ * assume that this cannot fail
+ */
+ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa));
+ mmap_read_lock(current->mm);
+ r = get_guest_storage_key(current->mm, hva, &storage_key);
+ mmap_read_unlock(current->mm);
+ if (r)
+ return r;
+ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key);
+ /* access key matches storage key -> allow */
+ if (access_control == access_key)
+ return 0;
+ if (mode == GACC_FETCH || mode == GACC_IFETCH) {
+ /* it is a fetch and fetch protection is off -> allow */
+ if (!(storage_key & _PAGE_FP_BIT))
+ return 0;
+ if (fetch_prot_override_applicable(vcpu, mode, asce) &&
+ fetch_prot_override_applies(ga, len))
+ return 0;
+ }
+ if (storage_prot_override_applicable(vcpu) &&
+ storage_prot_override_applies(access_control))
+ return 0;
+ return PGM_PROTECTION;
+}
+
+/**
+ * guest_range_to_gpas() - Calculate guest physical addresses of page fragments
+ * covering a logical range
+ * @vcpu: virtual cpu
+ * @ga: guest address, start of range
+ * @ar: access register
+ * @gpas: output argument, may be NULL
+ * @len: length of range in bytes
+ * @asce: address-space-control element to use for translation
+ * @mode: access mode
+ * @access_key: access key to mach the range's storage keys against
+ *
+ * Translate a logical range to a series of guest absolute addresses,
+ * such that the concatenation of page fragments starting at each gpa make up
+ * the whole range.
+ * The translation is performed as if done by the cpu for the given @asce, @ar,
+ * @mode and state of the @vcpu.
+ * If the translation causes an exception, its program interruption code is
+ * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified
+ * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject
+ * a correct exception into the guest.
+ * The resulting gpas are stored into @gpas, unless it is NULL.
+ *
+ * Note: All fragments except the first one start at the beginning of a page.
+ * When deriving the boundaries of a fragment from a gpa, all but the last
+ * fragment end at the end of the page.
+ *
+ * Return:
+ * * 0 - success
+ * * <0 - translation could not be performed, for example if guest
+ * memory could not be accessed
+ * * >0 - an access exception occurred. In this case the returned value
+ * is the program interruption code and the contents of pgm may
+ * be used to inject an exception into the guest.
+ */
+static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ unsigned long *gpas, unsigned long len,
+ const union asce asce, enum gacc_mode mode,
+ u8 access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned int offset = offset_in_page(ga);
+ unsigned int fragment_len;
int lap_enabled, rc = 0;
enum prot_type prot;
+ unsigned long gpa;
lap_enabled = low_address_protection_enabled(vcpu, asce);
- while (nr_pages) {
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
ga = kvm_s390_logical_to_effective(vcpu, ga);
if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
PROT_TYPE_LA);
- ga &= PAGE_MASK;
if (psw_bits(*psw).dat) {
- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot);
+ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot);
if (rc < 0)
return rc;
} else {
- *pages = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ gpa = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
+ prot = PROT_NONE;
+ }
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
- ga += PAGE_SIZE;
- pages++;
- nr_pages--;
+ rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga,
+ fragment_len);
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC);
+ if (gpas)
+ *gpas++ = gpa;
+ offset = 0;
+ ga += fragment_len;
+ len -= fragment_len;
}
return 0;
}
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
- unsigned long len, enum gacc_mode mode)
+static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len)
+{
+ const unsigned int offset = offset_in_page(gpa);
+ const gfn_t gfn = gpa_to_gfn(gpa);
+ int rc;
+
+ if (mode == GACC_STORE)
+ rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
+ else
+ rc = kvm_read_guest_page(kvm, gfn, data, offset, len);
+ return rc;
+}
+
+static int
+access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
+ void *data, unsigned int len, u8 access_key)
+{
+ struct kvm_memory_slot *slot;
+ bool writable;
+ gfn_t gfn;
+ hva_t hva;
+ int rc;
+
+ gfn = gpa >> PAGE_SHIFT;
+ slot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a ro memslot, even tho that can't occur (they're unsupported).
+ * Don't try to actually handle that case.
+ */
+ if (!writable && mode == GACC_STORE)
+ return -EOPNOTSUPP;
+ hva += offset_in_page(gpa);
+ if (mode == GACC_STORE)
+ rc = copy_to_user_key((void __user *)hva, data, len, access_key);
+ else
+ rc = copy_from_user_key(data, (void __user *)hva, len, access_key);
+ if (rc)
+ return PGM_PROTECTION;
+ if (mode == GACC_STORE)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ return 0;
+}
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len, enum gacc_mode mode, u8 access_key)
+{
+ int offset = offset_in_page(gpa);
+ int fragment_len;
+ int rc;
+
+ while (min(PAGE_SIZE - offset, len) > 0) {
+ fragment_len = min(PAGE_SIZE - offset, len);
+ rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key);
+ if (rc)
+ return rc;
+ offset = 0;
+ len -= fragment_len;
+ data += fragment_len;
+ gpa += fragment_len;
+ }
+ return 0;
+}
+
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, enum gacc_mode mode,
+ u8 access_key)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- unsigned long _len, nr_pages, gpa, idx;
- unsigned long pages_array[2];
- unsigned long *pages;
+ unsigned long nr_pages, idx;
+ unsigned long gpa_array[2];
+ unsigned int fragment_len;
+ unsigned long *gpas;
+ enum prot_type prot;
int need_ipte_lock;
union asce asce;
+ bool try_storage_prot_override;
+ bool try_fetch_prot_override;
int rc;
if (!len)
@@ -845,60 +1085,90 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
- pages = pages_array;
- if (nr_pages > ARRAY_SIZE(pages_array))
- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
- if (!pages)
+ gpas = gpa_array;
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long)));
+ if (!gpas)
return -ENOMEM;
+ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce);
+ try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
- ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
- for (idx = 0; idx < nr_pages && !rc; idx++) {
- gpa = *(pages + idx) + (ga & ~PAGE_MASK);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode == GACC_STORE)
- rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ ipte_lock(vcpu->kvm);
+ /*
+ * Since we do the access further down ultimately via a move instruction
+ * that does key checking and returns an error in case of a protection
+ * violation, we don't need to do the check during address translation.
+ * Skip it by passing access key 0, which matches any storage key,
+ * obviating the need for any further checks. As a result the check is
+ * handled entirely in hardware on access, we only need to take care to
+ * forego key protection checking if fetch protection override applies or
+ * retry with the special key 9 in case of storage protection override.
+ */
+ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0);
+ if (rc)
+ goto out_unlock;
+ for (idx = 0; idx < nr_pages; idx++) {
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len);
+ if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) {
+ rc = access_guest_page(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len);
+ } else {
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, access_key);
+ }
+ if (rc == PGM_PROTECTION && try_storage_prot_override)
+ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
+ data, fragment_len, PAGE_SPO_ACC);
+ if (rc)
+ break;
+ len -= fragment_len;
+ data += fragment_len;
+ ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len);
+ }
+ if (rc > 0) {
+ bool terminate = (mode == GACC_STORE) && (idx > 0);
+
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
else
- rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
- len -= _len;
- ga += _len;
- data += _len;
+ prot = PROT_NONE;
+ rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
+out_unlock:
if (need_ipte_lock)
- ipte_unlock(vcpu);
- if (nr_pages > ARRAY_SIZE(pages_array))
- vfree(pages);
+ ipte_unlock(vcpu->kvm);
+ if (nr_pages > ARRAY_SIZE(gpa_array))
+ vfree(gpas);
return rc;
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode)
{
- unsigned long _len, gpa;
+ unsigned int fragment_len;
+ unsigned long gpa;
int rc = 0;
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (mode)
- rc = write_guest_abs(vcpu, gpa, data, _len);
- else
- rc = read_guest_abs(vcpu, gpa, data, _len);
- len -= _len;
- gra += _len;
- data += _len;
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len);
+ rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len);
+ len -= fragment_len;
+ gra += fragment_len;
+ data += fragment_len;
}
return rc;
}
/**
- * guest_translate_address - translate guest logical into guest absolute address
+ * guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
* @ar: Access register
* @gpa: Guest physical address
* @mode: Translation access mode
+ * @access_key: access key to mach the storage key with
*
* Parameter semantics are the same as the ones from guest_translate.
* The memory contents at the guest address are not changed.
@@ -906,11 +1176,10 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* Note: The IPTE lock is not taken during this function, so the caller
* has to take care of this.
*/
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long *gpa, enum gacc_mode mode)
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode,
+ u8 access_key)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- enum prot_type prot;
union asce asce;
int rc;
@@ -918,23 +1187,8 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE)
- return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
- mode, PROT_TYPE_LA);
- }
-
- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot);
- if (rc > 0)
- return trans_exc(vcpu, rc, gva, 0, mode, prot);
- } else {
- *gpa = kvm_s390_real_to_abs(vcpu, gva);
- if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
- }
-
- return rc;
+ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode,
+ access_key);
}
/**
@@ -944,23 +1198,45 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
* @ar: Access register
* @length: Length of test range
* @mode: Translation access mode
+ * @access_key: access key to mach the storage keys with
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long length, enum gacc_mode mode)
+ unsigned long length, enum gacc_mode mode, u8 access_key)
{
- unsigned long gpa;
- unsigned long currlen;
+ union asce asce;
int rc = 0;
- ipte_lock(vcpu);
- while (length > 0 && !rc) {
- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
- gva += currlen;
- length -= currlen;
- }
- ipte_unlock(vcpu);
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
+ if (rc)
+ return rc;
+ ipte_lock(vcpu->kvm);
+ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
+ access_key);
+ ipte_unlock(vcpu->kvm);
+
+ return rc;
+}
+/**
+ * check_gpa_range - test a range of guest physical addresses for accessibility
+ * @kvm: virtual machine instance
+ * @gpa: guest physical address
+ * @length: length of test range
+ * @mode: access mode to test, relevant for storage keys
+ * @access_key: access key to mach the storage keys with
+ */
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+ enum gacc_mode mode, u8 access_key)
+{
+ unsigned int fragment_len;
+ int rc = 0;
+
+ while (length && !rc) {
+ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length);
+ rc = vm_check_access_key(kvm, access_key, mode, gpa);
+ length -= fragment_len;
+ gpa += fragment_len;
+ }
return rc;
}
@@ -1199,7 +1475,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
* tables/pointers we read stay valid - unshadowing is however
* always possible - only guest_table_lock protects us.
*/
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
@@ -1233,7 +1509,7 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
mmap_read_unlock(sg->mm);
return rc;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 7c72a5e3449f..9408d6cc8e2c 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -186,24 +186,34 @@ enum gacc_mode {
GACC_IFETCH,
};
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- u8 ar, unsigned long *gpa, enum gacc_mode mode);
+int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
+ unsigned long *gpa, enum gacc_mode mode,
+ u8 access_key);
+
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
- unsigned long length, enum gacc_mode mode);
+ unsigned long length, enum gacc_mode mode, u8 access_key);
+
+int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length,
+ enum gacc_mode mode, u8 access_key);
+
+int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data,
+ unsigned long len, enum gacc_mode mode, u8 access_key);
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
- unsigned long len, enum gacc_mode mode);
+int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, enum gacc_mode mode,
+ u8 access_key);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
/**
- * write_guest - copy data from kernel space to guest space
+ * write_guest_with_key - copy data from kernel space to guest space
* @vcpu: virtual cpu
* @ga: guest address
* @ar: access register
* @data: source address in kernel space
* @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
*
* Copy @len bytes from @data (kernel space) to @ga (guest address).
* In order to copy data to guest space the PSW of the vcpu is inspected:
@@ -214,8 +224,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* The addressing mode of the PSW is also inspected, so that address wrap
* around is taken into account for 24-, 31- and 64-bit addressing mode,
* if the to be copied data crosses page boundaries in guest address space.
- * In addition also low address and DAT protection are inspected before
- * copying any data (key protection is currently not implemented).
+ * In addition low address, DAT and key protection checks are performed before
+ * copying any data.
*
* This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
* In case of an access exception (e.g. protection exception) pgm will contain
@@ -243,10 +253,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* if data has been changed in guest space in case of an exception.
*/
static inline __must_check
+int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, u8 access_key)
+{
+ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE,
+ access_key);
+}
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * The behaviour of write_guest is identical to write_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
+ */
+static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
+ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return write_guest_with_key(vcpu, ga, ar, data, len, access_key);
+}
+
+/**
+ * read_guest_with_key - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ * @access_key: access key the storage key needs to match
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest_with_key is identical to write_guest_with_key,
+ * except that data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
+ void *data, unsigned long len, u8 access_key)
+{
+ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH,
+ access_key);
}
/**
@@ -259,14 +312,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
*
* Copy @len bytes from @ga (guest address) to @data (kernel space).
*
- * The behaviour of read_guest is identical to write_guest, except that
- * data will be copied from guest space to kernel space.
+ * The behaviour of read_guest is identical to read_guest_with_key, except
+ * that the PSW access key is used instead of an explicit argument.
*/
static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return read_guest_with_key(vcpu, ga, ar, data, len, access_key);
}
/**
@@ -287,7 +342,10 @@ static inline __must_check
int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH);
+ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key;
+
+ return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH,
+ access_key);
}
/**
@@ -382,9 +440,9 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return access_guest_real(vcpu, gra, data, len, 0);
}
-void ipte_lock(struct kvm_vcpu *vcpu);
-void ipte_unlock(struct kvm_vcpu *vcpu);
-int ipte_lock_held(struct kvm_vcpu *vcpu);
+void ipte_lock(struct kvm *kvm);
+void ipte_unlock(struct kvm *kvm);
+int ipte_lock_held(struct kvm *kvm);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
/* MVPG PEI indication bits */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index d07ff646d844..88112065d941 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- /* Make sure that the source is paged-in */
- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- reg2, &srcaddr, GACC_FETCH);
+ /* Ensure that the source is paged-in, no actual access -> no key checking */
+ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2],
+ reg2, &srcaddr, GACC_FETCH, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
if (rc != 0)
return rc;
- /* Make sure that the destination is paged-in */
- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- reg1, &dstaddr, GACC_STORE);
+ /* Ensure that the source is paged-in, no actual access -> no key checking */
+ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1],
+ reg1, &dstaddr, GACC_STORE, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
static int handle_pv_notification(struct kvm_vcpu *vcpu)
{
+ int ret;
+
if (vcpu->arch.sie_block->ipa == 0xb210)
return handle_pv_spx(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb220)
return handle_pv_sclp(vcpu);
if (vcpu->arch.sie_block->ipa == 0xb9a4)
return handle_pv_uvc(vcpu);
+ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
+ /*
+ * Besides external call, other SIGP orders also cause a
+ * 108 (pv notify) intercept. In contrast to external call,
+ * these orders need to be emulated and hence the appropriate
+ * place to handle them is in handle_instruction().
+ * So first try kvm_s390_handle_sigp_pei() and if that isn't
+ * successful, go on with handle_instruction().
+ */
+ ret = kvm_s390_handle_sigp_pei(vcpu);
+ if (!ret)
+ return ret;
+ }
return handle_instruction(vcpu);
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c3bd993fdd0c..ab569faf0df2 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,11 @@
#include <asm/switch_to.h>
#include <asm/nmi.h>
#include <asm/airq.h>
+#include <asm/tpi.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
+#include "pci.h"
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
@@ -702,7 +704,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
/*
* We indicate floating repressible conditions along with
* other pending conditions. Channel Report Pending and Channel
- * Subsystem damage are the only two and and are indicated by
+ * Subsystem damage are the only two and are indicated by
* bits in mcic and masked in cr14.
*/
if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
@@ -1334,10 +1336,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
hrtimer_start(&vcpu->arch.ckc_timer, sltime, HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ kvm_vcpu_halt(vcpu);
+ vcpu->valid_wakeup = false;
__unset_cpu_idle(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
hrtimer_cancel(&vcpu->arch.ckc_timer);
return 0;
@@ -1900,13 +1903,12 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
isc = int_word_to_isc(inti->io.io_int_word);
/*
- * Do not make use of gisa in protected mode. We do not use the lock
- * checking variant as this is just a performance optimization and we
- * do not hold the lock here. This is ok as the code will pick
- * interrupts from both "lists" for delivery.
+ * We do not use the lock checking variant as this is just a
+ * performance optimization and we do not hold the lock here.
+ * This is ok as the code will pick interrupts from both "lists"
+ * for delivery.
*/
- if (!kvm_s390_pv_get_handle(kvm) &&
- gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti);
@@ -2115,6 +2117,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
}
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -2659,7 +2668,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
switch (attr->group) {
@@ -3163,9 +3172,33 @@ void kvm_s390_gisa_init(struct kvm *kvm)
VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
}
+void kvm_s390_gisa_enable(struct kvm *kvm)
+{
+ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+ u32 gisa_desc;
+
+ if (gi->origin)
+ return;
+ kvm_s390_gisa_init(kvm);
+ gisa_desc = kvm_s390_get_gisa_desc(kvm);
+ if (!gisa_desc)
+ return;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ vcpu->arch.sie_block->gd = gisa_desc;
+ vcpu->arch.sie_block->eca |= ECA_AIV;
+ VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+ vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+ mutex_unlock(&vcpu->mutex);
+ }
+}
+
void kvm_s390_gisa_destroy(struct kvm *kvm)
{
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ struct kvm_s390_gisa *gisa = gi->origin;
if (!gi->origin)
return;
@@ -3176,6 +3209,25 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
cpu_relax();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
+ VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
+}
+
+void kvm_s390_gisa_disable(struct kvm *kvm)
+{
+ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ if (!gi->origin)
+ return;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ vcpu->arch.sie_block->eca &= ~ECA_AIV;
+ vcpu->arch.sie_block->gd = 0U;
+ mutex_unlock(&vcpu->mutex);
+ VCPU_EVENT(vcpu, 3, "AIV disabled for cpu %03u", vcpu->vcpu_id);
+ }
+ kvm_s390_gisa_destroy(kvm);
}
/**
@@ -3261,10 +3313,87 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
-static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
+static void aen_host_forward(unsigned long si)
+{
+ struct kvm_s390_gisa_interrupt *gi;
+ struct zpci_gaite *gaite;
+ struct kvm *kvm;
+
+ gaite = (struct zpci_gaite *)aift->gait +
+ (si * sizeof(struct zpci_gaite));
+ if (gaite->count == 0)
+ return;
+ if (gaite->aisb != 0)
+ set_bit_inv(gaite->aisbo, phys_to_virt(gaite->aisb));
+
+ kvm = kvm_s390_pci_si_to_kvm(aift, si);
+ if (!kvm)
+ return;
+ gi = &kvm->arch.gisa_int;
+
+ if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
+ !(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
+ gisa_set_ipm_gisc(gi->origin, gaite->gisc);
+ if (hrtimer_active(&gi->timer))
+ hrtimer_cancel(&gi->timer);
+ hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
+ kvm->stat.aen_forward++;
+ }
+}
+
+static void aen_process_gait(u8 isc)
+{
+ bool found = false, first = true;
+ union zpci_sic_iib iib = {{0}};
+ unsigned long si, flags;
+
+ spin_lock_irqsave(&aift->gait_lock, flags);
+
+ if (!aift->gait) {
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+ return;
+ }
+
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
+ if (si == -1UL) {
+ if (first || found) {
+ /* Re-enable interrupts. */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
+ &iib);
+ first = found = false;
+ } else {
+ /* Interrupts on and all bits processed */
+ break;
+ }
+ found = false;
+ si = 0;
+ /* Scan again after re-enabling interrupts */
+ continue;
+ }
+ found = true;
+ aen_host_forward(si);
+ }
+
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+}
+
+static void gib_alert_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
+
inc_irq_stat(IRQIO_GAL);
- process_gib_alert_list();
+
+ if ((info->forward || info->error) &&
+ IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ aen_process_gait(info->isc);
+ if (info->aism != 0)
+ process_gib_alert_list();
+ } else {
+ process_gib_alert_list();
+ }
}
static struct airq_struct gib_alert_irq = {
@@ -3276,6 +3405,11 @@ void kvm_s390_gib_destroy(void)
{
if (!gib)
return;
+ if (kvm_s390_pci_interp_allowed() && aift) {
+ mutex_lock(&aift->aift_lock);
+ kvm_s390_pci_aen_exit();
+ mutex_unlock(&aift->aift_lock);
+ }
chsc_sgib(0);
unregister_adapter_interrupt(&gib_alert_irq);
free_page((unsigned long)gib);
@@ -3313,6 +3447,14 @@ int kvm_s390_gib_init(u8 nisc)
goto out_unreg_gal;
}
+ if (kvm_s390_pci_interp_allowed()) {
+ if (kvm_s390_pci_aen_init(nisc)) {
+ pr_err("Initializing AEN for PCI failed\n");
+ rc = -EIO;
+ goto out_unreg_gal;
+ }
+ }
+
KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
goto out;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 14a18ba5ff2c..bc491a73815c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -6,7 +6,6 @@
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
- * Heiko Carstens <heiko.carstens@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
* Jason J. Herne <jjherne@us.ibm.com>
*/
@@ -32,6 +31,7 @@
#include <linux/sched/signal.h>
#include <linux/string.h>
#include <linux/pgtable.h>
+#include <linux/mmu_notifier.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
@@ -48,6 +48,7 @@
#include <asm/fpu/api.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include "pci.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -64,7 +65,8 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, inject_float_mchk),
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
- STATS_DESC_COUNTER(VM, inject_virtio)
+ STATS_DESC_COUNTER(VM, inject_virtio),
+ STATS_DESC_COUNTER(VM, aen_forward)
};
const struct kvm_stats_header kvm_vm_stats_header = {
@@ -295,7 +297,7 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
unsigned long long *delta = v;
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -503,6 +505,14 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ rc = kvm_s390_pci_init();
+ if (rc) {
+ pr_err("Unable to allocate AIFT for PCI\n");
+ goto out;
+ }
+ }
+
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
goto out;
@@ -517,6 +527,8 @@ out:
void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ kvm_s390_pci_exit();
debug_unregister(kvm_s390_dbf);
debug_unregister(kvm_s390_dbf_uv);
}
@@ -564,6 +576,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
+ case KVM_CAP_S390_MEM_OP_EXTENSION:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -606,6 +619,32 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host();
break;
+ case KVM_CAP_S390_PROTECTED_DUMP: {
+ u64 pv_cmds_dump[] = {
+ BIT_UVC_CMD_DUMP_INIT,
+ BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
+ BIT_UVC_CMD_DUMP_CPU,
+ BIT_UVC_CMD_DUMP_COMPLETE,
+ };
+ int i;
+
+ r = is_prot_virt_host();
+
+ for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
+ if (!test_bit_inv(pv_cmds_dump[i],
+ (unsigned long *)&uv_info.inst_calls_list)) {
+ r = 0;
+ break;
+ }
+ }
+ break;
+ }
+ case KVM_CAP_S390_ZPCI_OP:
+ r = kvm_s390_pci_interp_allowed();
+ break;
+ case KVM_CAP_S390_CPU_TOPOLOGY:
+ r = test_facility(11);
+ break;
default:
r = 0;
}
@@ -682,7 +721,7 @@ out:
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
{
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -817,6 +856,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
icpt_operexc_on_all_vcpus(kvm);
r = 0;
break;
+ case KVM_CAP_S390_CPU_TOPOLOGY:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else if (test_facility(11)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 11);
+ set_kvm_facility(kvm->arch.model.fac_list, 11);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
+ r ? "(not available)" : "(success)");
+ break;
default:
r = -EINVAL;
break;
@@ -936,7 +989,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
kvm_s390_vcpu_block_all(kvm);
@@ -1019,9 +1072,45 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
+{
+ /* Only set the ECB bits after guest requests zPCI interpretation */
+ if (!vcpu->kvm->arch.use_zpci_interp)
+ return;
+
+ vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
+ vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
+}
+
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ lockdep_assert_held(&kvm->lock);
+
+ if (!kvm_s390_pci_interp_allowed())
+ return;
+
+ /*
+ * If host is configured for PCI and the necessary facilities are
+ * available, turn on interpretation for the life of this guest
+ */
+ kvm->arch.use_zpci_interp = 1;
+
+ kvm_s390_vcpu_block_all(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvm_s390_vcpu_pci_setup(vcpu);
+ kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+ }
+
+ kvm_s390_vcpu_unblock_all(kvm);
+}
+
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
{
- int cx;
+ unsigned long cx;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(cx, vcpu, kvm)
@@ -1037,13 +1126,13 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
struct kvm_memory_slot *ms;
struct kvm_memslots *slots;
unsigned long ram_pages = 0;
- int slotnr;
+ int bkt;
/* migration mode already enabled */
if (kvm->arch.migration_mode)
return 0;
slots = kvm_memslots(kvm);
- if (!slots || !slots->used_slots)
+ if (!slots || kvm_memslots_empty(slots))
return -EINVAL;
if (!kvm->arch.use_cmma) {
@@ -1051,8 +1140,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
return 0;
}
/* mark all the pages in active slots as dirty */
- for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
- ms = slots->memslots + slotnr;
+ kvm_for_each_memslot(ms, bkt, slots) {
if (!ms->dirty_bitmap)
return -EINVAL;
/*
@@ -1119,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
return 0;
}
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
+
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_tod_clock gtod;
@@ -1128,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
return -EINVAL;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
gtod.epoch_idx, gtod.tod);
@@ -1159,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
sizeof(gtod.tod)))
return -EFAULT;
- kvm_s390_set_tod_clock(kvm, &gtod);
+ __kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
return 0;
}
@@ -1171,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
if (attr->flags)
return -EINVAL;
+ mutex_lock(&kvm->lock);
+ /*
+ * For protected guests, the TOD is managed by the ultravisor, so trying
+ * to change it will never bring the expected results.
+ */
+ if (kvm_s390_pv_is_protected(kvm)) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
switch (attr->attr) {
case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_set_tod_ext(kvm, attr);
@@ -1185,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENXIO;
break;
}
+
+out_unlock:
+ mutex_unlock(&kvm->lock);
return ret;
}
@@ -1333,8 +1436,7 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
mutex_unlock(&kvm->lock);
return -EBUSY;
}
- bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
+ bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
data.feat[0],
@@ -1505,8 +1607,7 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm,
{
struct kvm_s390_vm_cpu_feat data;
- bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
+ bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
@@ -1521,9 +1622,7 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
{
struct kvm_s390_vm_cpu_feat data;
- bitmap_copy((unsigned long *) data.feat,
- kvm_s390_available_cpu_feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
+ bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
@@ -1696,6 +1795,57 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
return ret;
}
+/**
+ * kvm_s390_update_topology_change_report - update CPU topology change report
+ * @kvm: guest KVM description
+ * @val: set or clear the MTCR bit
+ *
+ * Updates the Multiprocessor Topology-Change-Report bit to signal
+ * the guest with a topology change.
+ * This is only relevant if the topology facility is present.
+ *
+ * The SCA version, bsca or esca, doesn't matter as offset is the same.
+ */
+static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
+{
+ union sca_utility new, old;
+ struct bsca_block *sca;
+
+ read_lock(&kvm->arch.sca_lock);
+ sca = kvm->arch.sca;
+ do {
+ old = READ_ONCE(sca->utility);
+ new = old;
+ new.mtcr = val;
+ } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+ read_unlock(&kvm->arch.sca_lock);
+}
+
+static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ if (!test_kvm_facility(kvm, 11))
+ return -ENXIO;
+
+ kvm_s390_update_topology_change_report(kvm, !!attr->attr);
+ return 0;
+}
+
+static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ u8 topo;
+
+ if (!test_kvm_facility(kvm, 11))
+ return -ENXIO;
+
+ read_lock(&kvm->arch.sca_lock);
+ topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
+ read_unlock(&kvm->arch.sca_lock);
+
+ return put_user(topo, (u8 __user *)attr->addr);
+}
+
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
@@ -1716,6 +1866,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_set_migration(kvm, attr);
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = kvm_s390_set_topo_change_indication(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1741,6 +1894,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_get_migration(kvm, attr);
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = kvm_s390_get_topo_change_indication(kvm, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -1814,6 +1970,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_MIGRATION:
ret = 0;
break;
+ case KVM_S390_VM_CPU_TOPOLOGY:
+ ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
+ break;
default:
ret = -ENXIO;
break;
@@ -1943,41 +2102,6 @@ out:
/* for consistency */
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
-/*
- * Similar to gfn_to_memslot, but returns the index of a memslot also when the
- * address falls in a hole. In that case the index of one of the memslots
- * bordering the hole is returned.
- */
-static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
-{
- int start = 0, end = slots->used_slots;
- int slot = atomic_read(&slots->last_used_slot);
- struct kvm_memory_slot *memslots = slots->memslots;
-
- if (gfn >= memslots[slot].base_gfn &&
- gfn < memslots[slot].base_gfn + memslots[slot].npages)
- return slot;
-
- while (start < end) {
- slot = start + (end - start) / 2;
-
- if (gfn >= memslots[slot].base_gfn)
- end = slot;
- else
- start = slot + 1;
- }
-
- if (start >= slots->used_slots)
- return slots->used_slots - 1;
-
- if (gfn >= memslots[start].base_gfn &&
- gfn < memslots[start].base_gfn + memslots[start].npages) {
- atomic_set(&slots->last_used_slot, start);
- }
-
- return start;
-}
-
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
u8 *res, unsigned long bufsize)
{
@@ -2001,27 +2125,32 @@ static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
return 0;
}
+static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
+ gfn_t gfn)
+{
+ return ____gfn_to_memslot(slots, gfn, true);
+}
+
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
unsigned long cur_gfn)
{
- int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
- struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
unsigned long ofs = cur_gfn - ms->base_gfn;
+ struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
if (ms->base_gfn + ms->npages <= cur_gfn) {
- slotidx--;
+ mnode = rb_next(mnode);
/* If we are above the highest slot, wrap around */
- if (slotidx < 0)
- slotidx = slots->used_slots - 1;
+ if (!mnode)
+ mnode = rb_first(&slots->gfn_tree);
- ms = slots->memslots + slotidx;
+ ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = 0;
}
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
- while ((slotidx > 0) && (ofs >= ms->npages)) {
- slotidx--;
- ms = slots->memslots + slotidx;
- ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
+ ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+ ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
}
return ms->base_gfn + ofs;
}
@@ -2033,7 +2162,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
- if (unlikely(!slots->used_slots))
+ if (unlikely(kvm_memslots_empty(slots)))
return 0;
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
@@ -2043,7 +2172,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
if (!ms)
return 0;
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
- mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+ mem_end = kvm_s390_get_gfn_end(slots);
while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn);
@@ -2201,12 +2330,25 @@ out:
return r;
}
-static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+/**
+ * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
+ * non protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Does not stop in case of error, tries to convert as many
+ * CPUs as possible. In case of error, the RC and RRC of the last error are
+ * returned.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{
struct kvm_vcpu *vcpu;
- u16 rc, rrc;
+ unsigned long i;
+ u16 _rc, _rrc;
int ret = 0;
- int i;
/*
* We ignore failures and try to destroy as many CPUs as possible.
@@ -2218,23 +2360,42 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
- if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
- *rcp = rc;
- *rrcp = rrc;
+ if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
+ *rc = _rc;
+ *rrc = _rrc;
ret = -EIO;
}
mutex_unlock(&vcpu->mutex);
}
+ /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
+ if (use_gisa)
+ kvm_s390_gisa_enable(kvm);
return ret;
}
+/**
+ * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
+ * to protected.
+ * @kvm: the VM whose protected vCPUs are to be converted
+ * @rc: return value for the RC field of the UVC (in case of error)
+ * @rrc: return value for the RRC field of the UVC (in case of error)
+ *
+ * Tries to undo the conversion in case of error.
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{
- int i, r = 0;
+ unsigned long i;
+ int r = 0;
u16 dummy;
struct kvm_vcpu *vcpu;
+ /* Disable the GISA if the ultravisor does not support AIV. */
+ if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+ kvm_s390_gisa_disable(kvm);
+
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
@@ -2247,6 +2408,115 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
return r;
}
+/*
+ * Here we provide user space with a direct interface to query UV
+ * related data like UV maxima and available features as well as
+ * feature specific data.
+ *
+ * To facilitate future extension of the data structures we'll try to
+ * write data up to the maximum requested length.
+ */
+static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
+{
+ ssize_t len_min;
+
+ switch (info->header.id) {
+ case KVM_PV_INFO_VM: {
+ len_min = sizeof(info->header) + sizeof(info->vm);
+
+ if (info->header.len_max < len_min)
+ return -EINVAL;
+
+ memcpy(info->vm.inst_calls_list,
+ uv_info.inst_calls_list,
+ sizeof(uv_info.inst_calls_list));
+
+ /* It's max cpuid not max cpus, so it's off by one */
+ info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
+ info->vm.max_guests = uv_info.max_num_sec_conf;
+ info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
+ info->vm.feature_indication = uv_info.uv_feature_indications;
+
+ return len_min;
+ }
+ case KVM_PV_INFO_DUMP: {
+ len_min = sizeof(info->header) + sizeof(info->dump);
+
+ if (info->header.len_max < len_min)
+ return -EINVAL;
+
+ info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
+ info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
+ info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
+ return len_min;
+ }
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
+ struct kvm_s390_pv_dmp dmp)
+{
+ int r = -EINVAL;
+ void __user *result_buff = (void __user *)dmp.buff_addr;
+
+ switch (dmp.subcmd) {
+ case KVM_PV_DUMP_INIT: {
+ if (kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * Block SIE entry as concurrent dump UVCs could lead
+ * to validities.
+ */
+ kvm_s390_vcpu_block_all(kvm);
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ if (!r) {
+ kvm->arch.pv.dumping = true;
+ } else {
+ kvm_s390_vcpu_unblock_all(kvm);
+ r = -EINVAL;
+ }
+ break;
+ }
+ case KVM_PV_DUMP_CONFIG_STOR_STATE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * gaddr is an output parameter since we might stop
+ * early. As dmp will be copied back in our caller, we
+ * don't need to do it ourselves.
+ */
+ r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_DUMP_COMPLETE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ r = -EINVAL;
+ if (dmp.buff_len < uv_info.conf_dump_finalize_len)
+ break;
+
+ r = kvm_s390_pv_dump_complete(kvm, result_buff,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{
int r = 0;
@@ -2383,12 +2653,160 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
cmd->rc, cmd->rrc);
break;
}
+ case KVM_PV_INFO: {
+ struct kvm_s390_pv_info info = {};
+ ssize_t data_len;
+
+ /*
+ * No need to check the VM protection here.
+ *
+ * Maybe user space wants to query some of the data
+ * when the VM is still unprotected. If we see the
+ * need to fence a new data command we can still
+ * return an error in the info handler.
+ */
+
+ r = -EFAULT;
+ if (copy_from_user(&info, argp, sizeof(info.header)))
+ break;
+
+ r = -EINVAL;
+ if (info.header.len_max < sizeof(info.header))
+ break;
+
+ data_len = kvm_s390_handle_pv_info(&info);
+ if (data_len < 0) {
+ r = data_len;
+ break;
+ }
+ /*
+ * If a data command struct is extended (multiple
+ * times) this can be used to determine how much of it
+ * is valid.
+ */
+ info.header.len_written = data_len;
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &info, data_len))
+ break;
+
+ r = 0;
+ break;
+ }
+ case KVM_PV_DUMP: {
+ struct kvm_s390_pv_dmp dmp;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&dmp, argp, sizeof(dmp)))
+ break;
+
+ r = kvm_s390_pv_dmp(kvm, cmd, dmp);
+ if (r)
+ break;
+
+ if (copy_to_user(argp, &dmp, sizeof(dmp))) {
+ r = -EFAULT;
+ break;
+ }
+
+ break;
+ }
default:
r = -ENOTTY;
}
return r;
}
+static bool access_key_invalid(u8 access_key)
+{
+ return access_key > 0xf;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ u64 supported_flags;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+
+ supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
+ | KVM_S390_MEMOP_F_CHECK_ONLY;
+ if (mop->flags & ~supported_flags || !mop->size)
+ return -EINVAL;
+ if (mop->size > MEM_OP_MAX_SIZE)
+ return -E2BIG;
+ /*
+ * This is technically a heuristic only, if the kvm->lock is not
+ * taken, it is not guaranteed that the vm is/remains non-protected.
+ * This is ok from a kernel perspective, wrongdoing is detected
+ * on the access, -EFAULT is returned and the vm may crash the
+ * next time it accesses the memory in question.
+ * There is no sane usecase to do switching and a memop on two
+ * different CPUs at the same time.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+ if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
+ if (access_key_invalid(mop->key))
+ return -EINVAL;
+ } else {
+ mop->key = 0;
+ }
+ if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+ tmpbuf = vmalloc(mop->size);
+ if (!tmpbuf)
+ return -ENOMEM;
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+ r = PGM_ADDRESSING;
+ goto out_unlock;
+ }
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_ABSOLUTE_READ: {
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
+ } else {
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_FETCH, mop->key);
+ if (r == 0) {
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ }
+ }
+ break;
+ }
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
+ } else {
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ break;
+ }
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_STORE, mop->key);
+ }
+ break;
+ }
+ default:
+ r = -EINVAL;
+ }
+
+out_unlock:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ vfree(tmpbuf);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2513,6 +2931,28 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
}
+ case KVM_S390_MEM_OP: {
+ struct kvm_s390_mem_op mem_op;
+
+ if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+ r = kvm_s390_vm_mem_op(kvm, &mem_op);
+ else
+ r = -EFAULT;
+ break;
+ }
+ case KVM_S390_ZPCI_OP: {
+ struct kvm_s390_zpci_op args;
+
+ r = -EINVAL;
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ break;
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ r = kvm_s390_pci_zpci_op(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2674,6 +3114,14 @@ static void sca_dispose(struct kvm *kvm)
kvm->arch.sca = NULL;
}
+void kvm_arch_free_vm(struct kvm *kvm)
+{
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
+ kvm_s390_pci_clear_list(kvm);
+
+ __kvm_arch_free_vm(kvm);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
@@ -2756,6 +3204,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_crypto_init(kvm);
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
+ mutex_lock(&kvm->lock);
+ kvm_s390_pci_init_list(kvm);
+ kvm_s390_vcpu_pci_enable_interp(kvm);
+ mutex_unlock(&kvm->lock);
+ }
+
mutex_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -2809,6 +3264,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm))
sca_del_vcpu(vcpu);
+ kvm_s390_update_topology_change_report(vcpu->kvm, 1);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_remove(vcpu->arch.gmap);
@@ -2821,27 +3277,11 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)(vcpu->arch.sie_block));
}
-static void kvm_free_vcpus(struct kvm *kvm)
-{
- unsigned int i;
- struct kvm_vcpu *vcpu;
-
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_destroy(vcpu);
-
- mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
- mutex_unlock(&kvm->lock);
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
u16 rc, rrc;
- kvm_free_vcpus(kvm);
+ kvm_destroy_vcpus(kvm);
sca_dispose(kvm);
kvm_s390_gisa_destroy(kvm);
/*
@@ -2852,6 +3292,15 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
*/
if (kvm_s390_pv_get_handle(kvm))
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ /*
+ * Remove the mmu notifier only when the whole KVM VM is torn down,
+ * and only if one was registered to begin with. If the VM is
+ * currently not protected, but has been previously been protected,
+ * then it's possible that the notifier is still registered.
+ */
+ if (kvm->arch.pv.mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
+
debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
@@ -2945,7 +3394,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
struct bsca_block *old_sca = kvm->arch.sca;
struct esca_block *new_sca;
struct kvm_vcpu *vcpu;
- unsigned int vcpu_idx;
+ unsigned long vcpu_idx;
u32 scaol, scaoh;
if (kvm->arch.use_esca)
@@ -2995,9 +3444,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
if (!sclp.has_esca || !sclp.has_64bscao)
return false;
- mutex_lock(&kvm->lock);
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
- mutex_unlock(&kvm->lock);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
@@ -3220,6 +3667,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI;
+ if (test_kvm_facility(vcpu->kvm, 11))
+ vcpu->arch.sie_block->ecb |= ECB_PTF;
if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= ECB_TE;
if (!kvm_is_ucontrol(vcpu->kvm))
@@ -3272,6 +3721,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ kvm_s390_vcpu_pci_setup(vcpu);
+
mutex_lock(&vcpu->kvm->lock);
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
@@ -3309,9 +3760,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
- if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
- vcpu->arch.sie_block->gd |= GISA_FORMAT1;
+ vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
seqcount_init(&vcpu->arch.cputm_seqcount);
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -3353,6 +3802,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
rc = kvm_s390_vcpu_setup(vcpu);
if (rc)
goto out_ucontrol_uninit;
+
+ kvm_s390_update_topology_change_report(vcpu->kvm, 1);
return 0;
out_ucontrol_uninit:
@@ -3417,7 +3868,7 @@ void exit_sie(struct kvm_vcpu *vcpu)
/* Kick a guest cpu out of SIE to process a request synchronously */
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
- kvm_make_request(req, vcpu);
+ __kvm_make_request(req, vcpu);
kvm_s390_vcpu_request(vcpu);
}
@@ -3427,7 +3878,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
struct kvm *kvm = gmap->private;
struct kvm_vcpu *vcpu;
unsigned long prefix;
- int i;
+ unsigned long i;
if (gmap_is_shadow(gmap))
return;
@@ -3440,7 +3891,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
start, end);
- kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
+ kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
}
}
}
@@ -3449,7 +3900,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
/* do not poll with more than halt_poll_max_steal percent of steal time */
if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
- halt_poll_max_steal) {
+ READ_ONCE(halt_poll_max_steal)) {
vcpu->stat.halt_no_poll_steal++;
return true;
}
@@ -3842,19 +4293,19 @@ retry:
if (!kvm_request_pending(vcpu))
return 0;
/*
- * We use MMU_RELOAD just to re-arm the ipte notifier for the
+ * If the guest prefix changed, re-arm the ipte notifier for the
* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
* This ensures that the ipte instruction for this request has
* already finished. We might race against a second unmapper that
* wants to set the blocking bit. Lets just retry the request loop.
*/
- if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
rc = gmap_mprotect_notify(vcpu->arch.gmap,
kvm_s390_get_prefix(vcpu),
PAGE_SIZE * 2, PROT_WRITE);
if (rc) {
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
}
goto retry;
@@ -3907,22 +4358,18 @@ retry:
goto retry;
}
- /* nothing to do, just clear the request */
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
return 0;
}
-void kvm_s390_set_tod_clock(struct kvm *kvm,
- const struct kvm_s390_vm_tod_clock *gtod)
+static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
{
struct kvm_vcpu *vcpu;
union tod_clock clk;
- int i;
+ unsigned long i;
- mutex_lock(&kvm->lock);
preempt_disable();
store_tod_clock_ext(&clk);
@@ -3943,7 +4390,15 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
kvm_s390_vcpu_unblock_all(kvm);
preempt_enable();
+}
+
+int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
+{
+ if (!mutex_trylock(&kvm->lock))
+ return 0;
+ __kvm_s390_set_tod_clock(kvm, gtod);
mutex_unlock(&kvm->lock);
+ return 1;
}
/**
@@ -4179,14 +4634,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
* We try to hold kvm->srcu during most of vcpu_run (except when run-
* ning the guest), so that memslots (and other stuff) are protected
*/
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
do {
rc = vcpu_pre_run(vcpu);
if (rc)
break;
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
@@ -4223,12 +4678,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
local_irq_enable();
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return rc;
}
@@ -4410,6 +4865,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
int rc;
+ /*
+ * Running a VM while dumping always has the potential to
+ * produce inconsistent dump data. But for PV vcpus a SIE
+ * entry while dumping could also lead to a fatal validity
+ * intercept which we absolutely want to avoid.
+ */
+ if (vcpu->kvm->arch.pv.dumping)
+ return -EINVAL;
+
if (kvm_run->immediate_exit)
return -EINTR;
@@ -4552,7 +5016,7 @@ static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
{
- unsigned int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -4590,7 +5054,7 @@ int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
}
for (i = 0; i < online_vcpus; i++) {
- if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+ if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
started_vcpus++;
}
@@ -4645,16 +5109,23 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
}
- /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+ /*
+ * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+ * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+ * have been fully processed. This will ensure that the VCPU
+ * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+ */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
kvm_s390_clear_stop_irq(vcpu);
- kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
- if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+ struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
+
+ if (!is_vcpu_stopped(tmp)) {
started_vcpus++;
- started_vcpu = vcpu->kvm->vcpus[i];
+ started_vcpu = tmp;
}
}
@@ -4694,8 +5165,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
-static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
int r = 0;
@@ -4706,6 +5177,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
return -EINVAL;
if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
return -E2BIG;
+ if (!kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
switch (mop->op) {
case KVM_S390_MEMOP_SIDA_READ:
@@ -4722,24 +5195,29 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
}
return r;
}
-static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+
+static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
- | KVM_S390_MEMOP_F_CHECK_ONLY;
+ | KVM_S390_MEMOP_F_CHECK_ONLY
+ | KVM_S390_MEMOP_F_SKEY_PROTECTION;
if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
return -EINVAL;
-
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
-
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EINVAL;
-
+ if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
+ if (access_key_invalid(mop->key))
+ return -EINVAL;
+ } else {
+ mop->key = 0;
+ }
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
@@ -4749,11 +5227,12 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_FETCH);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ GACC_FETCH, mop->key);
break;
}
- r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+ mop->size, mop->key);
if (r == 0) {
if (copy_to_user(uaddr, tmpbuf, mop->size))
r = -EFAULT;
@@ -4761,15 +5240,16 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar,
- mop->size, GACC_STORE);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ GACC_STORE, mop->key);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
r = -EFAULT;
break;
}
- r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
+ mop->size, mop->key);
break;
}
@@ -4780,8 +5260,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
-static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
- struct kvm_s390_mem_op *mop)
+static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
{
int r, srcu_idx;
@@ -4790,12 +5270,12 @@ static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
case KVM_S390_MEMOP_LOGICAL_WRITE:
- r = kvm_s390_guest_mem_op(vcpu, mop);
+ r = kvm_s390_vcpu_mem_op(vcpu, mop);
break;
case KVM_S390_MEMOP_SIDA_READ:
case KVM_S390_MEMOP_SIDA_WRITE:
/* we are locked against sida going away by the vcpu->mutex */
- r = kvm_s390_guest_sida_op(vcpu, mop);
+ r = kvm_s390_vcpu_sida_op(vcpu, mop);
break;
default:
r = -EINVAL;
@@ -4833,6 +5313,48 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
return -ENOIOCTLCMD;
}
+static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
+ struct kvm_pv_cmd *cmd)
+{
+ struct kvm_s390_pv_dmp dmp;
+ void *data;
+ int ret;
+
+ /* Dump initialization is a prerequisite */
+ if (!vcpu->kvm->arch.pv.dumping)
+ return -EINVAL;
+
+ if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
+ return -EFAULT;
+
+ /* We only handle this subcmd right now */
+ if (dmp.subcmd != KVM_PV_DUMP_CPU)
+ return -EINVAL;
+
+ /* CPU dump length is the same as create cpu storage donation. */
+ if (dmp.buff_len != uv_info.guest_cpu_stor_len)
+ return -EINVAL;
+
+ data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
+
+ VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
+ vcpu->vcpu_id, cmd->rc, cmd->rrc);
+
+ if (ret)
+ ret = -EINVAL;
+
+ /* On success copy over the dump data */
+ if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
+ ret = -EFAULT;
+
+ kvfree(data);
+ return ret;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4958,7 +5480,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
+ r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4997,6 +5519,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
irq_state.len);
break;
}
+ case KVM_S390_PV_CPU_COMMAND: {
+ struct kvm_pv_cmd cmd;
+
+ r = -EINVAL;
+ if (!is_prot_virt_host())
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&cmd, argp, sizeof(cmd)))
+ break;
+
+ r = -EINVAL;
+ if (cmd.flags)
+ break;
+
+ /* We only handle this cmd right now */
+ if (cmd.cmd != KVM_PV_DUMP)
+ break;
+
+ r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
+
+ /* Always copy over UV rc / rrc data */
+ if (copy_to_user((__u8 __user *)argp, &cmd.rc,
+ sizeof(cmd.rc) + sizeof(cmd.rrc)))
+ r = -EFAULT;
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -5020,32 +5569,38 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ gpa_t size;
+
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
+ return 0;
+
/* A few sanity checks. We can have memory slots which have to be
located/ended at a segment boundary (1MB). The memory in userland is
ok to be fragmented into various different vmas. It is okay to mmap()
and munmap() stuff in this slot after doing this call at any time */
- if (mem->userspace_addr & 0xffffful)
+ if (new->userspace_addr & 0xffffful)
return -EINVAL;
- if (mem->memory_size & 0xffffful)
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
return -EINVAL;
- if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
return -EINVAL;
- /* When we are protected, we should not change the memory slots */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -5064,8 +5619,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
break;
fallthrough;
case KVM_MR_CREATE:
- rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
- mem->guest_phys_addr, mem->memory_size);
+ rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
+ new->base_gfn * PAGE_SIZE,
+ new->npages * PAGE_SIZE);
break;
case KVM_MR_FLAGS_ONLY:
break;
@@ -5084,11 +5640,6 @@ static inline unsigned long nonhyp_mask(int i)
return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
}
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
-{
- vcpu->valid_wakeup = false;
-}
-
static int __init kvm_s390_init(void)
{
int i;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c07a050d757d..4755492dfabc 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -105,7 +105,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
prefix);
vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
}
static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
@@ -217,6 +217,29 @@ static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
kvm->arch.user_cpu_state_ctrl = 1;
}
+/* get the end gfn of the last (highest gfn) memslot */
+static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
+{
+ struct rb_node *node;
+ struct kvm_memory_slot *ms;
+
+ if (WARN_ON(kvm_memslots_empty(slots)))
+ return 0;
+
+ node = rb_last(&slots->gfn_tree);
+ ms = container_of(node, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+ return ms->base_gfn + ms->npages;
+}
+
+static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
+{
+ u32 gd = (u32)(u64)kvm->arch.gisa_int.origin;
+
+ if (gd && sclp.has_gisaf)
+ gd |= GISA_FORMAT1;
+ return gd;
+}
+
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -227,6 +250,11 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
unsigned long tweak, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -335,8 +363,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-void kvm_s390_set_tod_clock(struct kvm *kvm,
- const struct kvm_s390_vm_tod_clock *gtod);
+int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -351,13 +378,14 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
+int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
WARN_ON(!mutex_is_locked(&kvm->lock));
@@ -367,7 +395,7 @@ static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -427,6 +455,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm);
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
extern struct kvm_device_ops kvm_flic_ops;
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
@@ -435,6 +464,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
void kvm_s390_gisa_init(struct kvm *kvm);
void kvm_s390_gisa_clear(struct kvm *kvm);
void kvm_s390_gisa_destroy(struct kvm *kvm);
+void kvm_s390_gisa_disable(struct kvm *kvm);
+void kvm_s390_gisa_enable(struct kvm *kvm);
int kvm_s390_gib_init(u8 nisc);
void kvm_s390_gib_destroy(void);
@@ -482,6 +513,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
/**
+ * kvm_s390_vcpu_pci_enable_interp
+ *
+ * Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
+ * interpretation as well as adapter interruption forwarding.
+ *
+ * @kvm: the KVM guest
+ */
+void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
+
+/**
* diag9c_forwarding_hz
*
* Set the maximum number of diag9c forwarding per second
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
new file mode 100644
index 000000000000..ded1af2ddae9
--- /dev/null
+++ b/arch/s390/kvm/pci.c
@@ -0,0 +1,702 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <asm/pci.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_io.h>
+#include <asm/sclp.h>
+#include "pci.h"
+#include "kvm-s390.h"
+
+struct zpci_aift *aift;
+
+static inline int __set_irq_noiib(u16 ctl, u8 isc)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ return zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+void kvm_s390_pci_aen_exit(void)
+{
+ unsigned long flags;
+ struct kvm_zdev **gait_kzdev;
+
+ lockdep_assert_held(&aift->aift_lock);
+
+ /*
+ * Contents of the aipb remain registered for the life of the host
+ * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
+ * in case we insert the KVM module again later. Clear the AIFT
+ * information and free anything not registered with underlying
+ * firmware.
+ */
+ spin_lock_irqsave(&aift->gait_lock, flags);
+ gait_kzdev = aift->kzdev;
+ aift->gait = NULL;
+ aift->sbv = NULL;
+ aift->kzdev = NULL;
+ spin_unlock_irqrestore(&aift->gait_lock, flags);
+
+ kfree(gait_kzdev);
+}
+
+static int zpci_setup_aipb(u8 nisc)
+{
+ struct page *page;
+ int size, rc;
+
+ zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
+ if (!zpci_aipb)
+ return -ENOMEM;
+
+ aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
+ if (!aift->sbv) {
+ rc = -ENOMEM;
+ goto free_aipb;
+ }
+ zpci_aif_sbv = aift->sbv;
+ size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
+ sizeof(struct zpci_gaite)));
+ page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
+ if (!page) {
+ rc = -ENOMEM;
+ goto free_sbv;
+ }
+ aift->gait = (struct zpci_gaite *)page_to_virt(page);
+
+ zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
+ zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
+ zpci_aipb->aipb.afi = nisc;
+ zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
+
+ /* Setup Adapter Event Notification Interpretation */
+ if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
+ rc = -EIO;
+ goto free_gait;
+ }
+
+ return 0;
+
+free_gait:
+ free_pages((unsigned long)aift->gait, size);
+free_sbv:
+ airq_iv_release(aift->sbv);
+ zpci_aif_sbv = NULL;
+free_aipb:
+ kfree(zpci_aipb);
+ zpci_aipb = NULL;
+
+ return rc;
+}
+
+static int zpci_reset_aipb(u8 nisc)
+{
+ /*
+ * AEN registration can only happen once per system boot. If
+ * an aipb already exists then AEN was already registered and
+ * we can re-use the aipb contents. This can only happen if
+ * the KVM module was removed and re-inserted. However, we must
+ * ensure that the same forwarding ISC is used as this is assigned
+ * during KVM module load.
+ */
+ if (zpci_aipb->aipb.afi != nisc)
+ return -EINVAL;
+
+ aift->sbv = zpci_aif_sbv;
+ aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
+
+ return 0;
+}
+
+int kvm_s390_pci_aen_init(u8 nisc)
+{
+ int rc = 0;
+
+ /* If already enabled for AEN, bail out now */
+ if (aift->gait || aift->sbv)
+ return -EPERM;
+
+ mutex_lock(&aift->aift_lock);
+ aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
+ GFP_KERNEL);
+ if (!aift->kzdev) {
+ rc = -ENOMEM;
+ goto unlock;
+ }
+
+ if (!zpci_aipb)
+ rc = zpci_setup_aipb(nisc);
+ else
+ rc = zpci_reset_aipb(nisc);
+ if (rc)
+ goto free_zdev;
+
+ /* Enable floating IRQs */
+ if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
+ rc = -EIO;
+ kvm_s390_pci_aen_exit();
+ }
+
+ goto unlock;
+
+free_zdev:
+ kfree(aift->kzdev);
+unlock:
+ mutex_unlock(&aift->aift_lock);
+ return rc;
+}
+
+/* Modify PCI: Register floating adapter interruption forwarding */
+static int kvm_zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {};
+ u8 status;
+
+ fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
+ fib.fmt0.sum = 1; /* enable summary notifications */
+ fib.fmt0.noi = airq_iv_end(zdev->aibv);
+ fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
+ fib.fmt0.aibvo = 0;
+ fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gisa;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister floating adapter interruption forwarding */
+static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {};
+ u8 cc, status;
+
+ fib.gd = zdev->gisa;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+static inline void unaccount_mem(unsigned long nr_pages)
+{
+ struct user_struct *user = get_uid(current_user());
+
+ if (user)
+ atomic_long_sub(nr_pages, &user->locked_vm);
+ if (current->mm)
+ atomic64_sub(nr_pages, &current->mm->pinned_vm);
+}
+
+static inline int account_mem(unsigned long nr_pages)
+{
+ struct user_struct *user = get_uid(current_user());
+ unsigned long page_limit, cur_pages, new_pages;
+
+ page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ do {
+ cur_pages = atomic_long_read(&user->locked_vm);
+ new_pages = cur_pages + nr_pages;
+ if (new_pages > page_limit)
+ return -ENOMEM;
+ } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
+ new_pages) != cur_pages);
+
+ atomic64_add(nr_pages, &current->mm->pinned_vm);
+
+ return 0;
+}
+
+static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
+ bool assist)
+{
+ struct page *pages[1], *aibv_page, *aisb_page = NULL;
+ unsigned int msi_vecs, idx;
+ struct zpci_gaite *gaite;
+ unsigned long hva, bit;
+ struct kvm *kvm;
+ phys_addr_t gaddr;
+ int rc = 0, gisc, npages, pcount = 0;
+
+ /*
+ * Interrupt forwarding is only applicable if the device is already
+ * enabled for interpretation
+ */
+ if (zdev->gisa == 0)
+ return -EINVAL;
+
+ kvm = zdev->kzdev->kvm;
+ msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
+
+ /* Get the associated forwarding ISC - if invalid, return the error */
+ gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
+ if (gisc < 0)
+ return gisc;
+
+ /* Replace AIBV address */
+ idx = srcu_read_lock(&kvm->srcu);
+ hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
+ npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (npages < 1) {
+ rc = -EIO;
+ goto out;
+ }
+ aibv_page = pages[0];
+ pcount++;
+ gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
+ fib->fmt0.aibv = gaddr;
+
+ /* Pin the guest AISB if one was specified */
+ if (fib->fmt0.sum == 1) {
+ idx = srcu_read_lock(&kvm->srcu);
+ hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
+ npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
+ pages);
+ srcu_read_unlock(&kvm->srcu, idx);
+ if (npages < 1) {
+ rc = -EIO;
+ goto unpin1;
+ }
+ aisb_page = pages[0];
+ pcount++;
+ }
+
+ /* Account for pinned pages, roll back on failure */
+ if (account_mem(pcount))
+ goto unpin2;
+
+ /* AISB must be allocated before we can fill in GAITE */
+ mutex_lock(&aift->aift_lock);
+ bit = airq_iv_alloc_bit(aift->sbv);
+ if (bit == -1UL)
+ goto unlock;
+ zdev->aisb = bit; /* store the summary bit number */
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
+ AIRQ_IV_BITLOCK |
+ AIRQ_IV_GUESTVEC,
+ phys_to_virt(fib->fmt0.aibv));
+
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+
+ /* If assist not requested, host will get all alerts */
+ if (assist)
+ gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+ else
+ gaite->gisa = 0;
+
+ gaite->gisc = fib->fmt0.isc;
+ gaite->count++;
+ gaite->aisbo = fib->fmt0.aisbo;
+ gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
+ ~PAGE_MASK));
+ aift->kzdev[zdev->aisb] = zdev->kzdev;
+ spin_unlock_irq(&aift->gait_lock);
+
+ /* Update guest FIB for re-issue */
+ fib->fmt0.aisbo = zdev->aisb & 63;
+ fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib->fmt0.isc = gisc;
+
+ /* Save some guest fib values in the host for later use */
+ zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
+ zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
+ mutex_unlock(&aift->aift_lock);
+
+ /* Issue the clp to setup the irq now */
+ rc = kvm_zpci_set_airq(zdev);
+ return rc;
+
+unlock:
+ mutex_unlock(&aift->aift_lock);
+unpin2:
+ if (fib->fmt0.sum == 1)
+ unpin_user_page(aisb_page);
+unpin1:
+ unpin_user_page(aibv_page);
+out:
+ return rc;
+}
+
+static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
+{
+ struct kvm_zdev *kzdev = zdev->kzdev;
+ struct zpci_gaite *gaite;
+ struct page *vpage = NULL, *spage = NULL;
+ int rc, pcount = 0;
+ u8 isc;
+
+ if (zdev->gisa == 0)
+ return -EINVAL;
+
+ mutex_lock(&aift->aift_lock);
+
+ /*
+ * If the clear fails due to an error, leave now unless we know this
+ * device is about to go away (force) -- In that case clear the GAITE
+ * regardless.
+ */
+ rc = kvm_zpci_clear_airq(zdev);
+ if (rc && !force)
+ goto out;
+
+ if (zdev->kzdev->fib.fmt0.aibv == 0)
+ goto out;
+ spin_lock_irq(&aift->gait_lock);
+ gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
+ sizeof(struct zpci_gaite));
+ isc = gaite->gisc;
+ gaite->count--;
+ if (gaite->count == 0) {
+ /* Release guest AIBV and AISB */
+ vpage = phys_to_page(kzdev->fib.fmt0.aibv);
+ if (gaite->aisb != 0)
+ spage = phys_to_page(gaite->aisb);
+ /* Clear the GAIT entry */
+ gaite->aisb = 0;
+ gaite->gisc = 0;
+ gaite->aisbo = 0;
+ gaite->gisa = 0;
+ aift->kzdev[zdev->aisb] = NULL;
+ /* Clear zdev info */
+ airq_iv_free_bit(aift->sbv, zdev->aisb);
+ airq_iv_release(zdev->aibv);
+ zdev->aisb = 0;
+ zdev->aibv = NULL;
+ }
+ spin_unlock_irq(&aift->gait_lock);
+ kvm_s390_gisc_unregister(kzdev->kvm, isc);
+ kzdev->fib.fmt0.isc = 0;
+ kzdev->fib.fmt0.aibv = 0;
+
+ if (vpage) {
+ unpin_user_page(vpage);
+ pcount++;
+ }
+ if (spage) {
+ unpin_user_page(spage);
+ pcount++;
+ }
+ if (pcount > 0)
+ unaccount_mem(pcount);
+out:
+ mutex_unlock(&aift->aift_lock);
+
+ return rc;
+}
+
+static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
+{
+ struct kvm_zdev *kzdev;
+
+ kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
+ if (!kzdev)
+ return -ENOMEM;
+
+ kzdev->zdev = zdev;
+ zdev->kzdev = kzdev;
+
+ return 0;
+}
+
+static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
+{
+ struct kvm_zdev *kzdev;
+
+ kzdev = zdev->kzdev;
+ WARN_ON(kzdev->zdev != zdev);
+ zdev->kzdev = NULL;
+ kfree(kzdev);
+}
+
+
+/*
+ * Register device with the specified KVM. If interpetation facilities are
+ * available, enable them and let userspace indicate whether or not they will
+ * be used (specify SHM bit to disable).
+ */
+static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
+{
+ struct zpci_dev *zdev = opaque;
+ int rc;
+
+ if (!zdev)
+ return -EINVAL;
+
+ mutex_lock(&zdev->kzdev_lock);
+
+ if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
+ mutex_unlock(&zdev->kzdev_lock);
+ return -EINVAL;
+ }
+
+ kvm_get_kvm(kvm);
+
+ mutex_lock(&kvm->lock);
+
+ rc = kvm_s390_pci_dev_open(zdev);
+ if (rc)
+ goto err;
+
+ /*
+ * If interpretation facilities aren't available, add the device to
+ * the kzdev list but don't enable for interpretation.
+ */
+ if (!kvm_s390_pci_interp_allowed())
+ goto out;
+
+ /*
+ * If this is the first request to use an interpreted device, make the
+ * necessary vcpu changes
+ */
+ if (!kvm->arch.use_zpci_interp)
+ kvm_s390_vcpu_pci_enable_interp(kvm);
+
+ if (zdev_enabled(zdev)) {
+ rc = zpci_disable_device(zdev);
+ if (rc)
+ goto err;
+ }
+
+ /*
+ * Store information about the identity of the kvm guest allowed to
+ * access this device via interpretation to be used by host CLP
+ */
+ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ goto clear_gisa;
+
+ /* Re-register the IOMMU that was already created */
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+ if (rc)
+ goto clear_gisa;
+
+out:
+ zdev->kzdev->kvm = kvm;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ return 0;
+
+clear_gisa:
+ zdev->gisa = 0;
+err:
+ if (zdev->kzdev)
+ kvm_s390_pci_dev_release(zdev);
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ kvm_put_kvm(kvm);
+ return rc;
+}
+
+static void kvm_s390_pci_unregister_kvm(void *opaque)
+{
+ struct zpci_dev *zdev = opaque;
+ struct kvm *kvm;
+
+ if (!zdev)
+ return;
+
+ mutex_lock(&zdev->kzdev_lock);
+
+ if (WARN_ON(!zdev->kzdev)) {
+ mutex_unlock(&zdev->kzdev_lock);
+ return;
+ }
+
+ kvm = zdev->kzdev->kvm;
+ mutex_lock(&kvm->lock);
+
+ /*
+ * A 0 gisa means interpretation was never enabled, just remove the
+ * device from the list.
+ */
+ if (zdev->gisa == 0)
+ goto out;
+
+ /* Forwarding must be turned off before interpretation */
+ if (zdev->kzdev->fib.fmt0.aibv != 0)
+ kvm_s390_pci_aif_disable(zdev, true);
+
+ /* Remove the host CLP guest designation */
+ zdev->gisa = 0;
+
+ if (zdev_enabled(zdev)) {
+ if (zpci_disable_device(zdev))
+ goto out;
+ }
+
+ if (zpci_enable_device(zdev))
+ goto out;
+
+ /* Re-register the IOMMU that was already created */
+ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table));
+
+out:
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_del(&zdev->kzdev->entry);
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+ kvm_s390_pci_dev_release(zdev);
+
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+
+ kvm_put_kvm(kvm);
+}
+
+void kvm_s390_pci_init_list(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.kzdev_list_lock);
+ INIT_LIST_HEAD(&kvm->arch.kzdev_list);
+}
+
+void kvm_s390_pci_clear_list(struct kvm *kvm)
+{
+ /*
+ * This list should already be empty, either via vfio device closures
+ * or kvm fd cleanup.
+ */
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+}
+
+static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
+{
+ struct zpci_dev *zdev = NULL;
+ struct kvm_zdev *kzdev;
+
+ spin_lock(&kvm->arch.kzdev_list_lock);
+ list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
+ if (kzdev->zdev->fh == fh) {
+ zdev = kzdev->zdev;
+ break;
+ }
+ }
+ spin_unlock(&kvm->arch.kzdev_list_lock);
+
+ return zdev;
+}
+
+static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
+ struct kvm_s390_zpci_op *args)
+{
+ struct zpci_fib fib = {};
+ bool hostflag;
+
+ fib.fmt0.aibv = args->u.reg_aen.ibv;
+ fib.fmt0.isc = args->u.reg_aen.isc;
+ fib.fmt0.noi = args->u.reg_aen.noi;
+ if (args->u.reg_aen.sb != 0) {
+ fib.fmt0.aisb = args->u.reg_aen.sb;
+ fib.fmt0.aisbo = args->u.reg_aen.sbo;
+ fib.fmt0.sum = 1;
+ } else {
+ fib.fmt0.aisb = 0;
+ fib.fmt0.aisbo = 0;
+ fib.fmt0.sum = 0;
+ }
+
+ hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
+ return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
+}
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
+{
+ struct kvm_zdev *kzdev;
+ struct zpci_dev *zdev;
+ int r;
+
+ zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
+ if (!zdev)
+ return -ENODEV;
+
+ mutex_lock(&zdev->kzdev_lock);
+ mutex_lock(&kvm->lock);
+
+ kzdev = zdev->kzdev;
+ if (!kzdev) {
+ r = -ENODEV;
+ goto out;
+ }
+ if (kzdev->kvm != kvm) {
+ r = -EPERM;
+ goto out;
+ }
+
+ switch (args->op) {
+ case KVM_S390_ZPCIOP_REG_AEN:
+ /* Fail on unknown flags */
+ if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
+ r = -EINVAL;
+ break;
+ }
+ r = kvm_s390_pci_zpci_reg_aen(zdev, args);
+ break;
+ case KVM_S390_ZPCIOP_DEREG_AEN:
+ r = kvm_s390_pci_aif_disable(zdev, false);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&zdev->kzdev_lock);
+ return r;
+}
+
+int kvm_s390_pci_init(void)
+{
+ zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
+ zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
+
+ if (!kvm_s390_pci_interp_allowed())
+ return 0;
+
+ aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
+ if (!aift)
+ return -ENOMEM;
+
+ spin_lock_init(&aift->gait_lock);
+ mutex_init(&aift->aift_lock);
+
+ return 0;
+}
+
+void kvm_s390_pci_exit(void)
+{
+ zpci_kvm_hook.kvm_register = NULL;
+ zpci_kvm_hook.kvm_unregister = NULL;
+
+ if (!kvm_s390_pci_interp_allowed())
+ return;
+
+ mutex_destroy(&aift->aift_lock);
+
+ kfree(aift);
+}
diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h
new file mode 100644
index 000000000000..486d06ef563f
--- /dev/null
+++ b/arch/s390/kvm/pci.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * s390 kvm PCI passthrough support
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef __KVM_S390_PCI_H
+#define __KVM_S390_PCI_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <asm/airq.h>
+#include <asm/cpu.h>
+
+struct kvm_zdev {
+ struct zpci_dev *zdev;
+ struct kvm *kvm;
+ struct zpci_fib fib;
+ struct list_head entry;
+};
+
+struct zpci_gaite {
+ u32 gisa;
+ u8 gisc;
+ u8 count;
+ u8 reserved;
+ u8 aisbo;
+ u64 aisb;
+};
+
+struct zpci_aift {
+ struct zpci_gaite *gait;
+ struct airq_iv *sbv;
+ struct kvm_zdev **kzdev;
+ spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
+ struct mutex aift_lock; /* Protects the other structures in aift */
+};
+
+extern struct zpci_aift *aift;
+
+static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
+ unsigned long si)
+{
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || !aift->kzdev ||
+ !aift->kzdev[si])
+ return NULL;
+ return aift->kzdev[si]->kvm;
+};
+
+int kvm_s390_pci_aen_init(u8 nisc);
+void kvm_s390_pci_aen_exit(void);
+
+void kvm_s390_pci_init_list(struct kvm *kvm);
+void kvm_s390_pci_clear_list(struct kvm *kvm);
+
+int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
+
+int kvm_s390_pci_init(void);
+void kvm_s390_pci_exit(void);
+
+static inline bool kvm_s390_pci_interp_allowed(void)
+{
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2817:
+ case 0x2818:
+ case 0x2827:
+ case 0x2828:
+ case 0x2964:
+ case 0x2965:
+ /* No SHM on certain machines */
+ return false;
+ default:
+ return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
+ sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
+ sclp.has_aisii);
+ }
+}
+
+#endif /* __KVM_S390_PCI_H */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 417154b314a6..3335fa09b6f1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -11,7 +11,6 @@
#include <linux/kvm.h>
#include <linux/gfp.h>
#include <linux/errno.h>
-#include <linux/compat.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
@@ -102,7 +101,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod);
- kvm_s390_set_tod_clock(vcpu->kvm, &gtod);
+ /*
+ * To set the TOD clock the kvm lock must be taken, but the vcpu lock
+ * is already held in handle_set_clock. The usual lock order is the
+ * opposite. As SCK is deprecated and should not be used in several
+ * cases, for example when the multiple epoch facility or TOD clock
+ * steering facility is installed (see Principles of Operation), a
+ * slow path can be used. If the lock can not be taken via try_lock,
+ * the instruction will be retried via -EAGAIN at a later point in
+ * time.
+ */
+ if (!kvm_s390_try_set_tod_clock(vcpu->kvm, &gtod)) {
+ kvm_s390_retry_instr(vcpu);
+ return -EAGAIN;
+ }
kvm_s390_set_psw_cc(vcpu, 0);
return 0;
@@ -430,7 +442,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
vcpu->stat.instruction_ipte_interlock++;
if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+ wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
@@ -861,10 +873,18 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- if (fc > 3) {
- kvm_s390_set_psw_cc(vcpu, 3);
- return 0;
- }
+ /* Bailout forbidden function codes */
+ if (fc > 3 && fc != 15)
+ goto out_no_data;
+
+ /*
+ * fc 15 is provided only with
+ * - PTF/CPU topology support through facility 15
+ * - KVM_CAP_S390_USER_STSI
+ */
+ if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
+ !vcpu->kvm->arch.user_stsi))
+ goto out_no_data;
if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
|| vcpu->run->s.regs.gprs[1] & 0xffff0000)
@@ -898,6 +918,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
goto out_no_data;
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
+ case 15: /* fc 15 is fully handled in userspace */
+ insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+ trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+ return -EREMOTE;
}
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
@@ -1443,10 +1467,11 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
static int handle_tprot(struct kvm_vcpu *vcpu)
{
- u64 address1, address2;
- unsigned long hva, gpa;
- int ret = 0, cc = 0;
+ u64 address, operand2;
+ unsigned long gpa;
+ u8 access_key;
bool writable;
+ int ret, cc;
u8 ar;
vcpu->stat.instruction_tprot++;
@@ -1454,45 +1479,48 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
+ kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL);
+ access_key = (operand2 & 0xf0) >> 4;
- /* we only handle the Linux memory detection case:
- * access key == 0
- * everything else goes to userspace. */
- if (address2 & 0xf0)
- return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
- if (ret == PGM_PROTECTION) {
+ ipte_lock(vcpu->kvm);
+
+ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+ GACC_STORE, access_key);
+ if (ret == 0) {
+ gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+ } else if (ret == PGM_PROTECTION) {
+ writable = false;
/* Write protected? Try again with read-only... */
- cc = 1;
- ret = guest_translate_address(vcpu, address1, ar, &gpa,
- GACC_FETCH);
+ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
+ GACC_FETCH, access_key);
}
- if (ret) {
- if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
- ret = kvm_s390_inject_program_int(vcpu, ret);
- } else if (ret > 0) {
- /* Translation not available */
- kvm_s390_set_psw_cc(vcpu, 3);
+ if (ret >= 0) {
+ cc = -1;
+
+ /* Fetching permitted; storing permitted */
+ if (ret == 0 && writable)
+ cc = 0;
+ /* Fetching permitted; storing not permitted */
+ else if (ret == 0 && !writable)
+ cc = 1;
+ /* Fetching not permitted; storing not permitted */
+ else if (ret == PGM_PROTECTION)
+ cc = 2;
+ /* Translation not available */
+ else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC)
+ cc = 3;
+
+ if (cc != -1) {
+ kvm_s390_set_psw_cc(vcpu, cc);
ret = 0;
+ } else {
+ ret = kvm_s390_inject_program_int(vcpu, ret);
}
- goto out_unlock;
}
- hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
- if (kvm_is_error_hva(hva)) {
- ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- } else {
- if (!writable)
- cc = 1; /* Write not permitted ==> read-only */
- kvm_s390_set_psw_cc(vcpu, cc);
- /* Note: CC2 only occurs for storage keys (not supported yet) */
- }
-out_unlock:
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
return ret;
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 00d272d134c2..7cb7799a0acb 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -7,13 +7,25 @@
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/minmax.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <asm/gmap.h>
#include <asm/uv.h>
#include <asm/mman.h>
+#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_notifier.h>
#include "kvm-s390.h"
+static void kvm_s390_clear_pv_state(struct kvm *kvm)
+{
+ kvm->arch.pv.handle = 0;
+ kvm->arch.pv.guest_len = 0;
+ kvm->arch.pv.stor_base = 0;
+ kvm->arch.pv.stor_var = NULL;
+}
+
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
{
int cc;
@@ -108,7 +120,7 @@ static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
vfree(kvm->arch.pv.stor_var);
free_pages(kvm->arch.pv.stor_base,
get_order(uv_info.guest_base_stor_len));
- memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+ kvm_s390_clear_pv_state(kvm);
}
static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
@@ -116,7 +128,6 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
unsigned long base = uv_info.guest_base_stor_len;
unsigned long virt = uv_info.guest_virt_var_stor_len;
unsigned long npages = 0, vlen = 0;
- struct kvm_memory_slot *memslot;
kvm->arch.pv.stor_var = NULL;
kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
@@ -130,8 +141,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
* Slots are sorted by GFN
*/
mutex_lock(&kvm->slots_lock);
- memslot = kvm_memslots(kvm)->memslots;
- npages = memslot->base_gfn + memslot->npages;
+ npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
mutex_unlock(&kvm->slots_lock);
kvm->arch.pv.guest_len = npages * PAGE_SIZE;
@@ -139,12 +149,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
/* Allocate variable storage */
vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
vlen += uv_info.guest_virt_base_stor_len;
- /*
- * The Create Secure Configuration Ultravisor Call does not support
- * using large pages for the virtual memory area.
- * This is a hardware limitation.
- */
- kvm->arch.pv.stor_var = vmalloc_no_huge(vlen);
+ kvm->arch.pv.stor_var = vzalloc(vlen);
if (!kvm->arch.pv.stor_var)
goto out_err;
return 0;
@@ -159,21 +164,51 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
int cc;
- /* make all pages accessible before destroying the guest */
- s390_reset_acc(kvm->mm);
-
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
- atomic_set(&kvm->mm->context.is_protected, 0);
+ /*
+ * if the mm still has a mapping, make all its pages accessible
+ * before destroying the guest
+ */
+ if (mmget_not_zero(kvm->mm)) {
+ s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+ mmput(kvm->mm);
+ }
+
+ if (!cc) {
+ atomic_dec(&kvm->mm->context.protected_count);
+ kvm_s390_pv_dealloc_vm(kvm);
+ } else {
+ /* Intended memory leak on "impossible" error */
+ s390_replace_asce(kvm->arch.gmap);
+ }
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
- /* Inteded memory leak on "impossible" error */
- if (!cc)
- kvm_s390_pv_dealloc_vm(kvm);
+
return cc ? -EIO : 0;
}
+static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
+ u16 dummy;
+
+ /*
+ * No locking is needed since this is the last thread of the last user of this
+ * struct mm.
+ * When the struct kvm gets deinitialized, this notifier is also
+ * unregistered. This means that if this notifier runs, then the
+ * struct kvm is still valid.
+ */
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+}
+
+static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
+ .release = kvm_s390_pv_mmu_notifier_release,
+};
+
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
{
struct uv_cb_cgc uvcb = {
@@ -204,14 +239,22 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
/* Outputs */
kvm->arch.pv.handle = uvcb.guest_handle;
+ atomic_inc(&kvm->mm->context.protected_count);
if (cc) {
- if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
- else
+ } else {
+ atomic_dec(&kvm->mm->context.protected_count);
kvm_s390_pv_dealloc_vm(kvm);
+ }
return -EIO;
}
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ /* Add the notifier only once. No races because we hold kvm->lock */
+ if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+ kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+ mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+ }
return 0;
}
@@ -231,8 +274,6 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
*rrc = uvcb.header.rrc;
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
*rc, *rrc);
- if (!cc)
- atomic_set(&kvm->mm->context.is_protected, 1);
return cc ? -EINVAL : 0;
}
@@ -305,3 +346,200 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
return -EINVAL;
return 0;
}
+
+int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_cpu uvcb = {
+ .header.cmd = UVC_CMD_DUMP_CPU,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = vcpu->arch.pv.handle,
+ .dump_area_origin = (u64)buff,
+ };
+ int cc;
+
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc;
+}
+
+/* Size of the cache for the storage state dump data. 1MB for now */
+#define DUMP_BUFF_LEN HPAGE_SIZE
+
+/**
+ * kvm_s390_pv_dump_stor_state
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @gaddr: Starting absolute guest address for which the storage state
+ * is requested.
+ * @buff_user_len: Length of the buff_user buffer
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Stores buff_len bytes of tweak component values to buff_user
+ * starting with the 1MB block specified by the absolute guest address
+ * (gaddr). The gaddr pointer will be updated with the last address
+ * for which data was written when returning to userspace. buff_user
+ * might be written to even if an error rc is returned. For instance
+ * if we encounter a fault after writing the first page of data.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the cache fails
+ * -EINVAL if gaddr is not aligned to 1MB
+ * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_stor_state uvcb = {
+ .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
+ .header.len = sizeof(uvcb),
+ .config_handle = kvm->arch.pv.handle,
+ .gaddr = *gaddr,
+ .dump_area_origin = 0,
+ };
+ const u64 increment_len = uv_info.conf_dump_storage_state_len;
+ size_t buff_kvm_size;
+ size_t size_done = 0;
+ u8 *buff_kvm = NULL;
+ int cc, ret;
+
+ ret = -EINVAL;
+ /* UV call processes 1MB guest storage chunks at a time */
+ if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
+ goto out;
+
+ /*
+ * We provide the storage state for 1MB chunks of guest
+ * storage. The buffer will need to be aligned to
+ * conf_dump_storage_state_len so we don't end on a partial
+ * chunk.
+ */
+ if (!buff_user_len ||
+ !IS_ALIGNED(buff_user_len, increment_len))
+ goto out;
+
+ /*
+ * Allocate a buffer from which we will later copy to the user
+ * process. We don't want userspace to dictate our buffer size
+ * so we limit it to DUMP_BUFF_LEN.
+ */
+ ret = -ENOMEM;
+ buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
+ buff_kvm = vzalloc(buff_kvm_size);
+ if (!buff_kvm)
+ goto out;
+
+ ret = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ /* We will loop until the user buffer is filled or an error occurs */
+ do {
+ /* Get 1MB worth of guest storage state data */
+ cc = uv_call_sched(0, (u64)&uvcb);
+
+ /* All or nothing */
+ if (cc) {
+ ret = -EINVAL;
+ break;
+ }
+
+ size_done += increment_len;
+ uvcb.dump_area_origin += increment_len;
+ buff_user_len -= increment_len;
+ uvcb.gaddr += HPAGE_SIZE;
+
+ /* KVM Buffer full, time to copy to the process */
+ if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
+ if (copy_to_user(buff_user, buff_kvm, size_done)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ buff_user += size_done;
+ size_done = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ }
+ } while (buff_user_len);
+
+ /* Report back where we ended dumping */
+ *gaddr = uvcb.gaddr;
+
+ /* Lets only log errors, we don't want to spam */
+out:
+ if (ret)
+ KVM_UV_EVENT(kvm, 3,
+ "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
+ uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ vfree(buff_kvm);
+
+ return ret;
+}
+
+/**
+ * kvm_s390_pv_dump_complete
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Completes the dumping operation and writes the completion data to
+ * user space.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the completion buffer fails
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_complete complete = {
+ .header.len = sizeof(complete),
+ .header.cmd = UVC_CMD_DUMP_COMPLETE,
+ .config_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ u64 *compl_data;
+ int ret;
+
+ /* Allocate dump area */
+ compl_data = vzalloc(uv_info.conf_dump_finalize_len);
+ if (!compl_data)
+ return -ENOMEM;
+ complete.dump_area_origin = (u64)compl_data;
+
+ ret = uv_call_sched(0, (u64)&complete);
+ *rc = complete.header.rc;
+ *rrc = complete.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
+ complete.header.rc, complete.header.rrc);
+
+ if (!ret) {
+ /*
+ * kvm_s390_pv_dealloc_vm() will also (mem)set
+ * this to false on a reboot or other destroy
+ * operation for this vm.
+ */
+ kvm->arch.pv.dumping = false;
+ kvm_s390_vcpu_unblock_all(kvm);
+ ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
+ if (ret)
+ ret = -EFAULT;
+ }
+ vfree(compl_data);
+ /* If the UVC returned an error, translate it to -EINVAL */
+ if (ret > 0)
+ ret = -EINVAL;
+ return ret;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf4de80bd541..cb747bf6c798 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -276,6 +276,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
+ /*
+ * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+ * are processed asynchronously. Until the affected VCPU finishes
+ * its work and calls back into KVM to clear the (RESTART or STOP)
+ * interrupt, we need to return any new non-reset orders "busy".
+ *
+ * This is important because a single VCPU could issue:
+ * 1) SIGP STOP $DESTINATION
+ * 2) SIGP SENSE $DESTINATION
+ *
+ * If the SIGP SENSE would not be rejected as "busy", it could
+ * return an incorrect answer as to whether the VCPU is STOPPED
+ * or OPERATING.
+ */
+ if (order_code != SIGP_INITIAL_CPU_RESET &&
+ order_code != SIGP_CPU_RESET) {
+ /*
+ * Lockless check. Both SIGP STOP and SIGP (RE)START
+ * properly synchronize everything while processing
+ * their orders, while the guest cannot observe a
+ * difference when issuing other orders from two
+ * different VCPUs.
+ */
+ if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+ kvm_s390_is_restart_irq_pending(dst_vcpu))
+ return SIGP_CC_BUSY;
+ }
+
switch (order_code) {
case SIGP_SENSE:
vcpu->stat.instruction_sigp_sense++;
@@ -452,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
struct kvm_vcpu *dest_vcpu;
u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
-
if (order_code == SIGP_EXTERNAL_CALL) {
+ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index acda4b6fc851..94138f8f0c1c 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* Host-protection-interruption introduced with ESOP */
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
+ /*
+ * CPU Topology
+ * This facility only uses the utility field of the SCA and none of
+ * the cpu entries that are problematic with the other interpretation
+ * facilities so we can pass it through
+ */
+ if (test_kvm_facility(vcpu->kvm, 11))
+ scb_s->ecb |= scb_o->ecb & ECB_PTF;
/* transactional execution */
if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
/* remap the prefix is tx is toggled on */
@@ -1091,7 +1099,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
handle_last_fault(vcpu, vsie_page);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
/* save current guest state of bp isolation override */
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
@@ -1133,7 +1141,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (!guest_bp_isolation)
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (rc == -EINTR) {
VCPU_EVENT(vcpu, 3, "%s", "machine check");
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 707cd4622c13..580d2e3265cb 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -17,4 +17,9 @@ KASAN_SANITIZE_uaccess.o := n
obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
+obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
+
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index bccbf394ae7e..be14c58cb989 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -4,7 +4,6 @@
*
* Copyright IBM Corp. 1999, 2008
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
- * Heiko Carstens <heiko.carstens@de.ibm.com>,
*/
#include <linux/processor.h>
@@ -14,13 +13,10 @@
void __delay(unsigned long loops)
{
- /*
- * To end the bloody studid and useless discussion about the
- * BogoMips number I took the liberty to define the __delay
- * function in a way that that resulting BogoMips number will
- * yield the megahertz number of the cpu. The important function
- * is udelay and that is done using the tod clock. -- martin.
- */
+ /*
+ * Loop 'loops' times. Callers must not assume a specific
+ * amount of time passes before this function returns.
+ */
asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
new file mode 100644
index 000000000000..854631d9cb03
--- /dev/null
+++ b/arch/s390/lib/expoline/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += expoline.o
diff --git a/arch/s390/lib/expoline/expoline.S b/arch/s390/lib/expoline/expoline.S
new file mode 100644
index 000000000000..92ed8409a7a4
--- /dev/null
+++ b/arch/s390/lib/expoline/expoline.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/nospec-insn.h>
+#include <linux/linkage.h>
+
+.macro GEN_ALL_BR_THUNK_EXTERN
+ .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ GEN_BR_THUNK_EXTERN %r\r1
+ .endr
+.endm
+
+GEN_ALL_BR_THUNK_EXTERN
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 692dc84cd19c..04d4c6cf898e 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
- ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm_inline volatile(
- ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
new file mode 100644
index 000000000000..9894009fc1f2
--- /dev/null
+++ b/arch/s390/lib/test_modules.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <kunit/test.h>
+#include <linux/module.h>
+
+#include "test_modules.h"
+
+/*
+ * Test that modules with many relocations are loaded properly.
+ */
+static void test_modules_many_vmlinux_relocs(struct kunit *test)
+{
+ int result = 0;
+
+#define CALL_RETURN(i) result += test_modules_return_ ## i()
+ REPEAT_10000(CALL_RETURN);
+ KUNIT_ASSERT_EQ(test, result, 49995000);
+}
+
+static struct kunit_case modules_testcases[] = {
+ KUNIT_CASE(test_modules_many_vmlinux_relocs),
+ {}
+};
+
+static struct kunit_suite modules_test_suite = {
+ .name = "modules_test_s390",
+ .test_cases = modules_testcases,
+};
+
+kunit_test_suites(&modules_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h
new file mode 100644
index 000000000000..6371fcf17684
--- /dev/null
+++ b/arch/s390/lib/test_modules.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_MODULES_H
+#define TEST_MODULES_H
+
+#define __REPEAT_10000_3(f, x) \
+ f(x ## 0); \
+ f(x ## 1); \
+ f(x ## 2); \
+ f(x ## 3); \
+ f(x ## 4); \
+ f(x ## 5); \
+ f(x ## 6); \
+ f(x ## 7); \
+ f(x ## 8); \
+ f(x ## 9)
+#define __REPEAT_10000_2(f, x) \
+ __REPEAT_10000_3(f, x ## 0); \
+ __REPEAT_10000_3(f, x ## 1); \
+ __REPEAT_10000_3(f, x ## 2); \
+ __REPEAT_10000_3(f, x ## 3); \
+ __REPEAT_10000_3(f, x ## 4); \
+ __REPEAT_10000_3(f, x ## 5); \
+ __REPEAT_10000_3(f, x ## 6); \
+ __REPEAT_10000_3(f, x ## 7); \
+ __REPEAT_10000_3(f, x ## 8); \
+ __REPEAT_10000_3(f, x ## 9)
+#define __REPEAT_10000_1(f, x) \
+ __REPEAT_10000_2(f, x ## 0); \
+ __REPEAT_10000_2(f, x ## 1); \
+ __REPEAT_10000_2(f, x ## 2); \
+ __REPEAT_10000_2(f, x ## 3); \
+ __REPEAT_10000_2(f, x ## 4); \
+ __REPEAT_10000_2(f, x ## 5); \
+ __REPEAT_10000_2(f, x ## 6); \
+ __REPEAT_10000_2(f, x ## 7); \
+ __REPEAT_10000_2(f, x ## 8); \
+ __REPEAT_10000_2(f, x ## 9)
+#define REPEAT_10000(f) \
+ __REPEAT_10000_1(f, 0); \
+ __REPEAT_10000_1(f, 1); \
+ __REPEAT_10000_1(f, 2); \
+ __REPEAT_10000_1(f, 3); \
+ __REPEAT_10000_1(f, 4); \
+ __REPEAT_10000_1(f, 5); \
+ __REPEAT_10000_1(f, 6); \
+ __REPEAT_10000_1(f, 7); \
+ __REPEAT_10000_1(f, 8); \
+ __REPEAT_10000_1(f, 9)
+
+#define DECLARE_RETURN(i) int test_modules_return_ ## i(void)
+REPEAT_10000(DECLARE_RETURN);
+
+#endif
diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c
new file mode 100644
index 000000000000..1670349a03eb
--- /dev/null
+++ b/arch/s390/lib/test_modules_helpers.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/export.h>
+
+#include "test_modules.h"
+
+#define DEFINE_RETURN(i) \
+ int test_modules_return_ ## i(void) \
+ { \
+ return 1 ## i - 10000; \
+ } \
+ EXPORT_SYMBOL_GPL(test_modules_return_ ## i)
+REPEAT_10000(DEFINE_RETURN);
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index bc7973359ae2..5a053b393d5c 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -8,6 +8,7 @@
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/kthread.h>
+#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/slab.h>
@@ -16,10 +17,14 @@
#include <linux/wait.h>
#include <asm/irq.h>
-struct kunit *current_test;
+static struct kunit *current_test;
#define BT_BUF_SIZE (PAGE_SIZE * 4)
+static bool force_bt;
+module_param_named(backtrace, force_bt, bool, 0444);
+MODULE_PARM_DESC(backtrace, "print backtraces for all tests");
+
/*
* To avoid printk line limit split backtrace by lines
*/
@@ -42,7 +47,7 @@ static void print_backtrace(char *bt)
static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
unsigned long sp)
{
- int frame_count, prev_is_func2, seen_func2_func1;
+ int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline;
const int max_frames = 128;
struct unwind_state state;
size_t bt_pos = 0;
@@ -58,6 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
frame_count = 0;
prev_is_func2 = 0;
seen_func2_func1 = 0;
+ seen_kretprobe_trampoline = 0;
unwind_for_each_frame(&state, task, regs, sp) {
unsigned long addr = unwind_get_return_address(&state);
char sym[KSYM_SYMBOL_LEN];
@@ -83,6 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
seen_func2_func1 = 1;
prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+ if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/"))
+ seen_kretprobe_trampoline = 1;
}
/* Check the results. */
@@ -98,7 +106,11 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
kunit_err(current_test, "Maximum number of frames exceeded\n");
ret = -EINVAL;
}
- if (ret)
+ if (seen_kretprobe_trampoline) {
+ kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n");
+ ret = -EINVAL;
+ }
+ if (ret || force_bt)
print_backtrace(bt);
kfree(bt);
return ret;
@@ -124,20 +136,87 @@ static struct unwindme *unwindme;
#define UWM_CALLER 0x8 /* Unwind starting from caller. */
#define UWM_SWITCH_STACK 0x10 /* Use call_on_stack. */
#define UWM_IRQ 0x20 /* Unwind from irq context. */
-#define UWM_PGM 0x40 /* Unwind from program check handler. */
+#define UWM_PGM 0x40 /* Unwind from program check handler */
+#define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */
+#define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */
+#define UWM_KRETPROBE 0x200 /* Unwind through kretprobed function. */
+#define UWM_KRETPROBE_HANDLER 0x400 /* Unwind from kretprobe handler. */
-static __always_inline unsigned long get_psw_addr(void)
+static __always_inline struct pt_regs fake_pt_regs(void)
{
- unsigned long psw_addr;
+ struct pt_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+ regs.gprs[15] = current_stack_pointer;
asm volatile(
"basr %[psw_addr],0\n"
- : [psw_addr] "=d" (psw_addr));
- return psw_addr;
+ : [psw_addr] "=d" (regs.psw.addr));
+ return regs;
+}
+
+static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ struct unwindme *u = unwindme;
+
+ if (!(u->flags & UWM_KRETPROBE_HANDLER))
+ return 0;
+
+ u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+ (u->flags & UWM_SP) ? u->sp : 0);
+
+ return 0;
+}
+
+static noinline notrace int test_unwind_kretprobed_func(struct unwindme *u)
+{
+ struct pt_regs regs;
+
+ if (!(u->flags & UWM_KRETPROBE))
+ return 0;
+
+ regs = fake_pt_regs();
+ return test_unwind(NULL, (u->flags & UWM_REGS) ? &regs : NULL,
+ (u->flags & UWM_SP) ? u->sp : 0);
+}
+
+static noinline int test_unwind_kretprobed_func_caller(struct unwindme *u)
+{
+ return test_unwind_kretprobed_func(u);
}
-#ifdef CONFIG_KPROBES
-static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
+static int test_unwind_kretprobe(struct unwindme *u)
+{
+ int ret;
+ struct kretprobe my_kretprobe;
+
+ if (!IS_ENABLED(CONFIG_KPROBES))
+ kunit_skip(current_test, "requires CONFIG_KPROBES");
+
+ u->ret = -1; /* make sure kprobe is called */
+ unwindme = u;
+
+ memset(&my_kretprobe, 0, sizeof(my_kretprobe));
+ my_kretprobe.handler = kretprobe_ret_handler;
+ my_kretprobe.maxactive = 1;
+ my_kretprobe.kp.addr = (kprobe_opcode_t *)test_unwind_kretprobed_func;
+
+ ret = register_kretprobe(&my_kretprobe);
+
+ if (ret < 0) {
+ kunit_err(current_test, "register_kretprobe failed %d\n", ret);
+ return -EINVAL;
+ }
+
+ ret = test_unwind_kretprobed_func_caller(u);
+ unregister_kretprobe(&my_kretprobe);
+ unwindme = NULL;
+ if (u->flags & UWM_KRETPROBE_HANDLER)
+ ret = u->ret;
+ return ret;
+}
+
+static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct unwindme *u = unwindme;
@@ -145,8 +224,97 @@ static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
(u->flags & UWM_SP) ? u->sp : 0);
return 0;
}
+
+extern const char test_unwind_kprobed_insn[];
+
+static noinline void test_unwind_kprobed_func(void)
+{
+ asm volatile(
+ " nopr %%r7\n"
+ "test_unwind_kprobed_insn:\n"
+ " nopr %%r7\n"
+ :);
+}
+
+static int test_unwind_kprobe(struct unwindme *u)
+{
+ struct kprobe kp;
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_KPROBES))
+ kunit_skip(current_test, "requires CONFIG_KPROBES");
+ if (!IS_ENABLED(CONFIG_KPROBES_ON_FTRACE) && u->flags & UWM_KPROBE_ON_FTRACE)
+ kunit_skip(current_test, "requires CONFIG_KPROBES_ON_FTRACE");
+
+ u->ret = -1; /* make sure kprobe is called */
+ unwindme = u;
+ memset(&kp, 0, sizeof(kp));
+ kp.pre_handler = kprobe_pre_handler;
+ kp.addr = u->flags & UWM_KPROBE_ON_FTRACE ?
+ (kprobe_opcode_t *)test_unwind_kprobed_func :
+ (kprobe_opcode_t *)test_unwind_kprobed_insn;
+ ret = register_kprobe(&kp);
+ if (ret < 0) {
+ kunit_err(current_test, "register_kprobe failed %d\n", ret);
+ return -EINVAL;
+ }
+
+ test_unwind_kprobed_func();
+ unregister_kprobe(&kp);
+ unwindme = NULL;
+ return u->ret;
+}
+
+static void notrace __used test_unwind_ftrace_handler(unsigned long ip,
+ unsigned long parent_ip,
+ struct ftrace_ops *fops,
+ struct ftrace_regs *fregs)
+{
+ struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2];
+
+ u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL,
+ (u->flags & UWM_SP) ? u->sp : 0);
+}
+
+static noinline int test_unwind_ftraced_func(struct unwindme *u)
+{
+ return READ_ONCE(u)->ret;
+}
+
+static int test_unwind_ftrace(struct unwindme *u)
+{
+ int ret;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ struct ftrace_ops *fops;
+
+ fops = kunit_kzalloc(current_test, sizeof(*fops), GFP_KERNEL);
+ fops->func = test_unwind_ftrace_handler;
+ fops->flags = FTRACE_OPS_FL_DYNAMIC |
+ FTRACE_OPS_FL_RECURSION |
+ FTRACE_OPS_FL_SAVE_REGS |
+ FTRACE_OPS_FL_PERMANENT;
+#else
+ kunit_skip(current_test, "requires CONFIG_DYNAMIC_FTRACE");
#endif
+ ret = ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 0, 0);
+ if (ret) {
+ kunit_err(current_test, "failed to set ftrace filter (%d)\n", ret);
+ return -1;
+ }
+
+ ret = register_ftrace_function(fops);
+ if (!ret) {
+ ret = test_unwind_ftraced_func(u);
+ unregister_ftrace_function(fops);
+ } else {
+ kunit_err(current_test, "failed to register ftrace handler (%d)\n", ret);
+ }
+
+ ftrace_set_filter_ip(fops, (unsigned long)test_unwind_ftraced_func, 1, 0);
+ return ret;
+}
+
/* This function may or may not appear in the backtrace. */
static noinline int unwindme_func4(struct unwindme *u)
{
@@ -157,41 +325,15 @@ static noinline int unwindme_func4(struct unwindme *u)
wait_event(u->task_wq, kthread_should_park());
kthread_parkme();
return 0;
-#ifdef CONFIG_KPROBES
- } else if (u->flags & UWM_PGM) {
- struct kprobe kp;
- int ret;
-
- unwindme = u;
- memset(&kp, 0, sizeof(kp));
- kp.symbol_name = "do_report_trap";
- kp.pre_handler = pgm_pre_handler;
- ret = register_kprobe(&kp);
- if (ret < 0) {
- kunit_err(current_test, "register_kprobe failed %d\n", ret);
- return -EINVAL;
- }
-
- /*
- * Trigger operation exception; use insn notation to bypass
- * llvm's integrated assembler sanity checks.
- */
- asm volatile(
- " .insn e,0x0000\n" /* illegal opcode */
- "0: nopr %%r7\n"
- EX_TABLE(0b, 0b)
- :);
-
- unregister_kprobe(&kp);
- unwindme = NULL;
- return u->ret;
-#endif
+ } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) {
+ return test_unwind_kprobe(u);
+ } else if (u->flags & (UWM_KRETPROBE | UWM_KRETPROBE_HANDLER)) {
+ return test_unwind_kretprobe(u);
+ } else if (u->flags & UWM_FTRACE) {
+ return test_unwind_ftrace(u);
} else {
- struct pt_regs regs;
+ struct pt_regs regs = fake_pt_regs();
- memset(&regs, 0, sizeof(regs));
- regs.psw.addr = get_psw_addr();
- regs.gprs[15] = current_stack_pointer();
return test_unwind(NULL,
(u->flags & UWM_REGS) ? &regs : NULL,
(u->flags & UWM_SP) ? u->sp : 0);
@@ -255,7 +397,7 @@ static int test_unwind_irq(struct unwindme *u)
}
/* Spawns a task and passes it to test_unwind(). */
-static int test_unwind_task(struct kunit *test, struct unwindme *u)
+static int test_unwind_task(struct unwindme *u)
{
struct task_struct *task;
int ret;
@@ -270,7 +412,7 @@ static int test_unwind_task(struct kunit *test, struct unwindme *u)
*/
task = kthread_run(unwindme_func1, u, "%s", __func__);
if (IS_ERR(task)) {
- kunit_err(test, "kthread_run() failed\n");
+ kunit_err(current_test, "kthread_run() failed\n");
return PTR_ERR(task);
}
/*
@@ -293,49 +435,47 @@ struct test_params {
/*
* Create required parameter list for tests
*/
+#define TEST_WITH_FLAGS(f) { .flags = f, .name = #f }
static const struct test_params param_list[] = {
- {.flags = UWM_DEFAULT, .name = "UWM_DEFAULT"},
- {.flags = UWM_SP, .name = "UWM_SP"},
- {.flags = UWM_REGS, .name = "UWM_REGS"},
- {.flags = UWM_SWITCH_STACK,
- .name = "UWM_SWITCH_STACK"},
- {.flags = UWM_SP | UWM_REGS,
- .name = "UWM_SP | UWM_REGS"},
- {.flags = UWM_CALLER | UWM_SP,
- .name = "WM_CALLER | UWM_SP"},
- {.flags = UWM_CALLER | UWM_SP | UWM_REGS,
- .name = "UWM_CALLER | UWM_SP | UWM_REGS"},
- {.flags = UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK,
- .name = "UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"},
- {.flags = UWM_THREAD, .name = "UWM_THREAD"},
- {.flags = UWM_THREAD | UWM_SP,
- .name = "UWM_THREAD | UWM_SP"},
- {.flags = UWM_THREAD | UWM_CALLER | UWM_SP,
- .name = "UWM_THREAD | UWM_CALLER | UWM_SP"},
- {.flags = UWM_IRQ, .name = "UWM_IRQ"},
- {.flags = UWM_IRQ | UWM_SWITCH_STACK,
- .name = "UWM_IRQ | UWM_SWITCH_STACK"},
- {.flags = UWM_IRQ | UWM_SP,
- .name = "UWM_IRQ | UWM_SP"},
- {.flags = UWM_IRQ | UWM_REGS,
- .name = "UWM_IRQ | UWM_REGS"},
- {.flags = UWM_IRQ | UWM_SP | UWM_REGS,
- .name = "UWM_IRQ | UWM_SP | UWM_REGS"},
- {.flags = UWM_IRQ | UWM_CALLER | UWM_SP,
- .name = "UWM_IRQ | UWM_CALLER | UWM_SP"},
- {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS,
- .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS"},
- {.flags = UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK,
- .name = "UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK"},
- #ifdef CONFIG_KPROBES
- {.flags = UWM_PGM, .name = "UWM_PGM"},
- {.flags = UWM_PGM | UWM_SP,
- .name = "UWM_PGM | UWM_SP"},
- {.flags = UWM_PGM | UWM_REGS,
- .name = "UWM_PGM | UWM_REGS"},
- {.flags = UWM_PGM | UWM_SP | UWM_REGS,
- .name = "UWM_PGM | UWM_SP | UWM_REGS"},
- #endif
+ TEST_WITH_FLAGS(UWM_DEFAULT),
+ TEST_WITH_FLAGS(UWM_SP),
+ TEST_WITH_FLAGS(UWM_REGS),
+ TEST_WITH_FLAGS(UWM_SWITCH_STACK),
+ TEST_WITH_FLAGS(UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_CALLER | UWM_SP),
+ TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK),
+ TEST_WITH_FLAGS(UWM_THREAD),
+ TEST_WITH_FLAGS(UWM_THREAD | UWM_SP),
+ TEST_WITH_FLAGS(UWM_THREAD | UWM_CALLER | UWM_SP),
+ TEST_WITH_FLAGS(UWM_IRQ),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_SWITCH_STACK),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_SP),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK),
+ TEST_WITH_FLAGS(UWM_PGM),
+ TEST_WITH_FLAGS(UWM_PGM | UWM_SP),
+ TEST_WITH_FLAGS(UWM_PGM | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_PGM | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE),
+ TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP),
+ TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KPROBE_ON_FTRACE | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_FTRACE),
+ TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP),
+ TEST_WITH_FLAGS(UWM_FTRACE | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_FTRACE | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KRETPROBE),
+ TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP),
+ TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER),
+ TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP),
+ TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_REGS),
+ TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP | UWM_REGS),
};
/*
@@ -360,7 +500,7 @@ static void test_unwind_flags(struct kunit *test)
params = (const struct test_params *)test->param_value;
u.flags = params->flags;
if (u.flags & UWM_THREAD)
- KUNIT_EXPECT_EQ(test, 0, test_unwind_task(test, &u));
+ KUNIT_EXPECT_EQ(test, 0, test_unwind_task(&u));
else if (u.flags & UWM_IRQ)
KUNIT_EXPECT_EQ(test, 0, test_unwind_irq(&u));
else
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index a596e69d3c47..720036fb1924 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -8,13 +8,10 @@
* Gerald Schaefer (gerald.schaefer@de.ibm.com)
*/
-#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
-#include <linux/errno.h>
#include <linux/mm.h>
-#include <asm/mmu_context.h>
-#include <asm/facility.h>
+#include <asm/asm-extable.h>
#ifdef CONFIG_DEBUG_ENTRY
void debug_user_asce(int exit)
@@ -34,39 +31,21 @@ void debug_user_asce(int exit)
}
#endif /*CONFIG_DEBUG_ENTRY */
-#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
-static DEFINE_STATIC_KEY_FALSE(have_mvcos);
-
-static int __init uaccess_init(void)
-{
- if (test_facility(27))
- static_branch_enable(&have_mvcos);
- return 0;
-}
-early_initcall(uaccess_init);
-
-static inline int copy_with_mvcos(void)
-{
- if (static_branch_likely(&have_mvcos))
- return 1;
- return 0;
-}
-#else
-static inline int copy_with_mvcos(void)
-{
- return 1;
-}
-#endif
-
-static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
- unsigned long size)
+static unsigned long raw_copy_from_user_key(void *to, const void __user *from,
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac2.key = key,
+ .oac2.as = PSW_BITS_AS_SECONDARY,
+ .oac2.k = 1,
+ .oac2.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " lghi 0,%[spec]\n"
- "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+ " lr 0,%[spec]\n"
+ "0: mvcos 0(%2),0(%1),%0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
" slgr %1,%3\n"
@@ -77,69 +56,56 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+ "3: mvcos 0(%2),0(%1),%4\n"
"7: slgr %0,%4\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : "+a" (size), "+a" (from), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
-static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
- unsigned long size)
+unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- " sacf 0\n"
- "0: mvcp 0(%0,%2),0(%1),%3\n"
- "7: jz 5f\n"
- "1: algr %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcp 0(%0,%2),0(%1),%3\n"
- "8: jnz 1b\n"
- " j 5f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " lghi %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " slgr %4,%1\n"
- " clgr %0,%4\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "4: mvcp 0(%4,%2),0(%1),%3\n"
- "9: slgr %0,%4\n"
- " j 6f\n"
- "5: slgr %0,%0\n"
- "6: sacf 768\n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
- EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
+ return raw_copy_from_user_key(to, from, n, 0);
}
+EXPORT_SYMBOL(raw_copy_from_user);
-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+unsigned long _copy_from_user_key(void *to, const void __user *from,
+ unsigned long n, unsigned long key)
{
- if (copy_with_mvcos())
- return copy_from_user_mvcos(to, from, n);
- return copy_from_user_mvcp(to, from, n);
+ unsigned long res = n;
+
+ might_fault();
+ if (!should_fail_usercopy()) {
+ instrument_copy_from_user_before(to, from, n);
+ res = raw_copy_from_user_key(to, from, n, key);
+ instrument_copy_from_user_after(to, from, n, res);
+ }
+ if (unlikely(res))
+ memset(to + (n - res), 0, res);
+ return res;
}
-EXPORT_SYMBOL(raw_copy_from_user);
+EXPORT_SYMBOL(_copy_from_user_key);
-static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
- unsigned long size)
+static unsigned long raw_copy_to_user_key(void __user *to, const void *from,
+ unsigned long size, unsigned long key)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.key = key,
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.k = 1,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+ " lr 0,%[spec]\n"
+ "0: mvcos 0(%1),0(%2),%0\n"
"6: jz 4f\n"
"1: algr %0,%3\n"
" slgr %1,%3\n"
@@ -150,69 +116,48 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
" slgr %4,%1\n"
" clgr %0,%4\n" /* copy crosses next page boundary? */
" jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+ "3: mvcos 0(%1),0(%2),%4\n"
"7: slgr %0,%4\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : [spec] "K" (0x81UL)
+ : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+ : [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
-static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
- unsigned long size)
+unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- unsigned long tmp1, tmp2;
-
- tmp1 = -256UL;
- asm volatile(
- " sacf 0\n"
- "0: mvcs 0(%0,%1),0(%2),%3\n"
- "7: jz 5f\n"
- "1: algr %0,%3\n"
- " la %1,256(%1)\n"
- " la %2,256(%2)\n"
- "2: mvcs 0(%0,%1),0(%2),%3\n"
- "8: jnz 1b\n"
- " j 5f\n"
- "3: la %4,255(%1)\n" /* %4 = ptr + 255 */
- " lghi %3,-4096\n"
- " nr %4,%3\n" /* %4 = (ptr + 255) & -4096 */
- " slgr %4,%1\n"
- " clgr %0,%4\n" /* copy crosses next page boundary? */
- " jnh 6f\n"
- "4: mvcs 0(%4,%1),0(%2),%3\n"
- "9: slgr %0,%4\n"
- " j 6f\n"
- "5: slgr %0,%0\n"
- "6: sacf 768\n"
- EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
- EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
- : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
+ return raw_copy_to_user_key(to, from, n, 0);
}
+EXPORT_SYMBOL(raw_copy_to_user);
-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+unsigned long _copy_to_user_key(void __user *to, const void *from,
+ unsigned long n, unsigned long key)
{
- if (copy_with_mvcos())
- return copy_to_user_mvcos(to, from, n);
- return copy_to_user_mvcs(to, from, n);
+ might_fault();
+ if (should_fail_usercopy())
+ return n;
+ instrument_copy_to_user(to, from, n);
+ return raw_copy_to_user_key(to, from, n, key);
}
-EXPORT_SYMBOL(raw_copy_to_user);
+EXPORT_SYMBOL(_copy_to_user_key);
-static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+unsigned long __clear_user(void __user *to, unsigned long size)
{
unsigned long tmp1, tmp2;
+ union oac spec = {
+ .oac1.as = PSW_BITS_AS_SECONDARY,
+ .oac1.a = 1,
+ };
tmp1 = -4096UL;
asm volatile(
- " llilh 0,%[spec]\n"
- "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
- " jz 4f\n"
+ " lr 0,%[spec]\n"
+ "0: mvcos 0(%1),0(%4),%0\n"
+ "6: jz 4f\n"
"1: algr %0,%2\n"
" slgr %1,%2\n"
" j 0b\n"
@@ -221,57 +166,15 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
" slgr %3,%1\n"
" clgr %0,%3\n" /* copy crosses next page boundary? */
" jnh 5f\n"
- "3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
- " slgr %0,%3\n"
+ "3: mvcos 0(%1),0(%4),%3\n"
+ "7: slgr %0,%3\n"
" j 5f\n"
"4: slgr %0,%0\n"
"5:\n"
- EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+ EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b)
: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
- : "a" (empty_zero_page), [spec] "K" (0x81UL)
+ : "a" (empty_zero_page), [spec] "d" (spec.val)
: "cc", "memory", "0");
return size;
}
-
-static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
-{
- unsigned long tmp1, tmp2;
-
- asm volatile(
- " sacf 256\n"
- " aghi %0,-1\n"
- " jo 5f\n"
- " bras %3,3f\n"
- " xc 0(1,%1),0(%1)\n"
- "0: aghi %0,257\n"
- " la %2,255(%1)\n" /* %2 = ptr + 255 */
- " srl %2,12\n"
- " sll %2,12\n" /* %2 = (ptr + 255) & -4096 */
- " slgr %2,%1\n"
- " clgr %0,%2\n" /* clear crosses next page boundary? */
- " jnh 5f\n"
- " aghi %2,-1\n"
- "1: ex %2,0(%3)\n"
- " aghi %2,1\n"
- " slgr %0,%2\n"
- " j 5f\n"
- "2: xc 0(256,%1),0(%1)\n"
- " la %1,256(%1)\n"
- "3: aghi %0,-256\n"
- " jnm 2b\n"
- "4: ex %0,0(%3)\n"
- "5: slgr %0,%0\n"
- "6: sacf 768\n"
- EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
- : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
- : : "cc", "memory");
- return size;
-}
-
-unsigned long __clear_user(void __user *to, unsigned long size)
-{
- if (copy_with_mvcos())
- return clear_user_mvcos(to, size);
- return clear_user_xc(to, size);
-}
EXPORT_SYMBOL(__clear_user);
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
index a963c3d8ad0d..fb924a8041dc 100644
--- a/arch/s390/lib/xor.c
+++ b/arch/s390/lib/xor.c
@@ -11,7 +11,8 @@
#include <linux/raid/xor.h>
#include <asm/xor.h>
-static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
{
asm volatile(
" larl 1,2f\n"
@@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
: "0", "1", "cc", "memory");
}
-static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
{
asm volatile(
" larl 1,2f\n"
@@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: : "0", "1", "cc", "memory");
}
-static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
{
asm volatile(
" larl 1,2f\n"
@@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
: : "0", "1", "cc", "memory");
}
-static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
{
asm volatile(
" larl 1,2f\n"
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index cd67e94c16aa..57e4f3a24829 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,7 +4,7 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y += page-states.o pageattr.o pgtable.o pgalloc.o
+obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 2203164b39da..9141ed4c52e9 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter,
} else
free_page((unsigned long) npa);
}
- diag10_range(addr >> PAGE_SHIFT, 1);
+ diag10_range(virt_to_pfn(addr), 1);
pa->pages[pa->index++] = addr;
(*counter)++;
spin_unlock(&cmm_lock);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9f9af5298dd6..9953819d7959 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -8,8 +8,10 @@
#include <linux/kasan.h>
#include <asm/ptdump.h>
#include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
#include <asm/nospec-branch.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
static unsigned long max_addr;
@@ -21,6 +23,8 @@ struct addr_marker {
enum address_markers_idx {
IDENTITY_BEFORE_NR = 0,
IDENTITY_BEFORE_END_NR,
+ AMODE31_START_NR,
+ AMODE31_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
#ifdef CONFIG_KFENCE
@@ -39,11 +43,17 @@ enum address_markers_idx {
VMALLOC_END_NR,
MODULES_NR,
MODULES_END_NR,
+ ABS_LOWCORE_NR,
+ ABS_LOWCORE_END_NR,
+ MEMCPY_REAL_NR,
+ MEMCPY_REAL_END_NR,
};
static struct addr_marker address_markers[] = {
[IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
+ [AMODE31_START_NR] = {0, "Amode31 Area Start"},
+ [AMODE31_END_NR] = {0, "Amode31 Area End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
#ifdef CONFIG_KFENCE
@@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = {
[VMALLOC_END_NR] = {0, "vmalloc Area End"},
[MODULES_NR] = {0, "Modules Area Start"},
[MODULES_END_NR] = {0, "Modules Area End"},
+ [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
+ [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
+ [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
+ [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
{ -1, NULL }
};
@@ -276,8 +290,14 @@ static int pt_dump_init(void)
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+ address_markers[AMODE31_START_NR].start_address = __samode31;
+ address_markers[AMODE31_END_NR].start_address = __eamode31;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END;
+ address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
+ address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
+ address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
+ address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
new file mode 100644
index 000000000000..1e4d2187541a
--- /dev/null
+++ b/arch/s390/mm/extable.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitfield.h>
+#include <linux/extable.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/panic.h>
+#include <asm/asm-extable.h>
+#include <asm/extable.h>
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr)
+{
+ const struct exception_table_entry *fixup;
+ size_t num;
+
+ fixup = search_exception_tables(addr);
+ if (fixup)
+ return fixup;
+ num = __stop_amode31_ex_table - __start_amode31_ex_table;
+ return search_extable(__start_amode31_ex_table, num, addr);
+}
+
+static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+
+ regs->gprs[reg_err] = -EFAULT;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+ unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+ size_t len = FIELD_GET(EX_DATA_LEN, ex->data);
+
+ regs->gprs[reg_err] = -EFAULT;
+ memset((void *)regs->gprs[reg_addr], 0, len);
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+ unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+ unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+
+ regs->gprs[reg_err] = -EFAULT;
+ regs->gprs[reg_zero] = 0;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+}
+
+bool fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *ex;
+
+ ex = s390_search_extables(instruction_pointer(regs));
+ if (!ex)
+ return false;
+ switch (ex->type) {
+ case EX_TYPE_FIXUP:
+ return ex_handler_fixup(ex, regs);
+ case EX_TYPE_BPF:
+ return ex_handler_bpf(ex, regs);
+ case EX_TYPE_UA_STORE:
+ return ex_handler_ua_store(ex, regs);
+ case EX_TYPE_UA_LOAD_MEM:
+ return ex_handler_ua_load_mem(ex, regs);
+ case EX_TYPE_UA_LOAD_REG:
+ return ex_handler_ua_load_reg(ex, regs);
+ }
+ panic("invalid exception table entry");
+}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d30f5986fa85..9649d9382e0a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <linux/kfence.h>
+#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/diag.h>
#include <asm/gmap.h>
@@ -115,7 +116,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_cont("R1:%016lx ", *table);
if (*table & _REGION_ENTRY_INVALID)
goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION2:
table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -124,7 +125,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_cont("R2:%016lx ", *table);
if (*table & _REGION_ENTRY_INVALID)
goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_REGION3:
table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -133,7 +134,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_cont("R3:%016lx ", *table);
if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ table = __va(*table & _REGION_ENTRY_ORIGIN);
fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@ -142,7 +143,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address)
pr_cont("S:%016lx ", *table);
if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
goto out;
- table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
}
table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
if (bad_address(table))
@@ -227,27 +228,10 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-const struct exception_table_entry *s390_search_extables(unsigned long addr)
-{
- const struct exception_table_entry *fixup;
-
- fixup = search_extable(__start_amode31_ex_table,
- __stop_amode31_ex_table - __start_amode31_ex_table,
- addr);
- if (!fixup)
- fixup = search_exception_tables(addr);
- return fixup;
-}
-
static noinline void do_no_context(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
-
- /* Are we prepared to handle this kernel fault? */
- fixup = s390_search_extables(regs->psw.addr);
- if (fixup && ex_handle(fixup, regs))
+ if (fixup_exception(regs))
return;
-
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
@@ -284,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
-static noinline void do_fault_error(struct pt_regs *regs, int access,
- vm_fault_t fault)
+static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
{
int si_code;
@@ -395,7 +378,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (access == VM_WRITE || is_write)
+ if (is_write)
+ access = VM_WRITE;
+ if (access == VM_WRITE)
flags |= FAULT_FLAG_WRITE;
mmap_read_lock(mm);
@@ -435,8 +420,6 @@ retry:
if (unlikely(!(vma->vm_flags & access)))
goto out_up;
- if (is_vm_hugetlb_page(vma))
- address &= HPAGE_MASK;
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -449,25 +432,37 @@ retry:
goto out_up;
goto out;
}
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED) {
+ if (gmap) {
+ mmap_read_lock(mm);
+ goto out_gmap;
+ }
+ fault = 0;
+ goto out;
+ }
+
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_RETRY) {
- if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
- (flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* FAULT_FLAG_RETRY_NOWAIT has been set,
- * mmap_lock has not been released */
- current->thread.gmap_pfault = 1;
- fault = VM_FAULT_PFAULT;
- goto out_up;
- }
- flags &= ~FAULT_FLAG_RETRY_NOWAIT;
- flags |= FAULT_FLAG_TRIED;
- mmap_read_lock(mm);
- goto retry;
+ if (fault & VM_FAULT_RETRY) {
+ if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
+ (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ /*
+ * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
+ * not been released
+ */
+ current->thread.gmap_pfault = 1;
+ fault = VM_FAULT_PFAULT;
+ goto out_up;
}
+ flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+ flags |= FAULT_FLAG_TRIED;
+ mmap_read_lock(mm);
+ goto retry;
}
+out_gmap:
if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
address = __gmap_link(gmap, current->thread.gmap_addr,
address);
@@ -520,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs)
fault = do_exception(regs, access);
}
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_protection_exception);
@@ -532,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs)
access = VM_ACCESS_FLAGS;
fault = do_exception(regs, access);
if (unlikely(fault))
- do_fault_error(regs, access, fault);
+ do_fault_error(regs, fault);
}
NOKPROBE_SYMBOL(do_dat_exception);
@@ -770,6 +765,7 @@ void do_secure_storage_access(struct pt_regs *regs)
struct vm_area_struct *vma;
struct mm_struct *mm;
struct page *page;
+ struct gmap *gmap;
int rc;
/*
@@ -799,13 +795,24 @@ void do_secure_storage_access(struct pt_regs *regs)
}
switch (get_fault_type(regs)) {
+ case GMAP_FAULT:
+ mm = current->mm;
+ gmap = (struct gmap *)S390_lowcore.gmap;
+ mmap_read_lock(mm);
+ addr = __gmap_translate(gmap, addr);
+ mmap_read_unlock(mm);
+ if (IS_ERR_VALUE(addr)) {
+ do_fault_error(regs, VM_FAULT_BADMAP);
+ break;
+ }
+ fallthrough;
case USER_FAULT:
mm = current->mm;
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (!vma) {
mmap_read_unlock(mm);
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
break;
}
page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
@@ -827,9 +834,8 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
break;
- case GMAP_FAULT:
default:
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
}
}
@@ -841,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs)
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
if (get_fault_type(regs) != GMAP_FAULT) {
- do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ do_fault_error(regs, VM_FAULT_BADMAP);
WARN_ON_ONCE(1);
return;
}
@@ -853,6 +859,16 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
void do_secure_storage_violation(struct pt_regs *regs)
{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ /*
+ * If the VM has been rebooted, its address space might still contain
+ * secure pages from the previous boot.
+ * Clear the page so it can be reused.
+ */
+ if (!gmap_destroy_page(gmap, gaddr))
+ return;
/*
* Either KVM messed up the secure guest mapping or the same
* page is mapped into multiple secure guests.
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index dfee0ebb2fac..02d15c8dc92e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -974,18 +974,18 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
return -EAGAIN;
if (prot == PROT_NONE && !pmd_i) {
- pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
+ new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
}
if (prot == PROT_READ && !pmd_p) {
- pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
- pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
+ new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
+ new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT));
gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
}
if (bits & GMAP_NOTIFY_MPROT)
- pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
+ set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
/* Shadow GMAP protection needs split PMDs */
if (bits & GMAP_NOTIFY_SHADOW)
@@ -1151,7 +1151,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
address = pte_val(pte) & PAGE_MASK;
address += gaddr & ~PAGE_MASK;
*val = *(unsigned long *) address;
- pte_val(*ptep) |= _PAGE_YOUNG;
+ set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
/* Do *NOT* clear the _PAGE_INVALID bit! */
rc = 0;
}
@@ -1183,6 +1183,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table);
static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
struct gmap_rmap *rmap)
{
+ struct gmap_rmap *temp;
void __rcu **slot;
BUG_ON(!gmap_is_shadow(sg));
@@ -1190,6 +1191,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
if (slot) {
rmap->next = radix_tree_deref_slot_protected(slot,
&sg->guest_table_lock);
+ for (temp = rmap->next; temp; temp = temp->next) {
+ if (temp->raddr == rmap->raddr) {
+ kfree(rmap);
+ return;
+ }
+ }
radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
} else {
rmap->next = NULL;
@@ -1278,7 +1285,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr)
{
asm volatile(
- " .insn rrf,0xb98e0000,%0,%1,0,0"
+ " idte %0,0,%1"
: : "a" (asce), "a" (vaddr) : "cc", "memory");
}
@@ -2275,7 +2282,7 @@ EXPORT_SYMBOL_GPL(ptep_notify);
static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
unsigned long gaddr)
{
- pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN)));
gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
}
@@ -2294,7 +2301,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
{
gaddr &= HPAGE_MASK;
pmdp_notify_gmap(gmap, pmdp, gaddr);
- pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN));
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
IDTE_GLOBAL);
@@ -2302,7 +2309,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
__pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
- *pmdp = new;
+ set_pmd(pmdp, new);
}
static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
@@ -2324,7 +2331,7 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
_SEGMENT_ENTRY_GMAP_UC));
if (purge)
__pmdp_csp(pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
}
spin_unlock(&gmap->guest_table_lock);
}
@@ -2447,7 +2454,7 @@ static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
return false;
/* Clear UC indication and reset protection */
- pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
+ set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC)));
gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
return true;
}
@@ -2508,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = {
static inline void thp_split_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
walk_page_vma(vma, &thp_split_walk_ops, NULL);
@@ -2577,8 +2585,9 @@ int gmap_mark_unmergeable(void)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int ret;
+ VMA_ITERATOR(vmi, mm, 0);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vma->vm_flags);
if (ret)
@@ -2601,6 +2610,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
return 0;
}
+/*
+ * Give a chance to schedule after setting a key to 256 pages.
+ * We only hold the mm lock, which is a rwsem and the kvm srcu.
+ * Both can sleep.
+ */
+static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ cond_resched();
+ return 0;
+}
+
static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
unsigned long hmask, unsigned long next,
struct mm_walk *walk)
@@ -2623,12 +2644,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
end = start + HPAGE_SIZE - 1;
__storage_key_init_range(start, end);
set_bit(PG_arch_1, &page->flags);
+ cond_resched();
return 0;
}
static const struct mm_walk_ops enable_skey_walk_ops = {
.hugetlb_entry = __s390_enable_skey_hugetlb,
.pte_entry = __s390_enable_skey_pte,
+ .pmd_entry = __s390_enable_skey_pmd,
};
int s390_enable_skey(void)
@@ -2676,41 +2699,168 @@ void s390_reset_cmma(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+#define GATHER_GET_PAGES 32
+
+struct reset_walk_state {
+ unsigned long next;
+ unsigned long count;
+ unsigned long pfns[GATHER_GET_PAGES];
+};
+
+static int s390_gather_pages(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct reset_walk_state *p = walk->private;
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte)) {
+ /* we have a reference from the mapping, take an extra one */
+ get_page(phys_to_page(pte_val(pte)));
+ p->pfns[p->count] = phys_to_pfn(pte_val(pte));
+ p->next = next;
+ p->count++;
+ }
+ return p->count >= GATHER_GET_PAGES;
+}
+
+static const struct mm_walk_ops gather_pages_ops = {
+ .pte_entry = s390_gather_pages,
+};
+
/*
- * make inaccessible pages accessible again
+ * Call the Destroy secure page UVC on each page in the given array of PFNs.
+ * Each page needs to have an extra reference, which will be released here.
*/
-static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
+void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
{
- pte_t pte = READ_ONCE(*ptep);
+ unsigned long i;
- /* There is a reference through the mapping */
- if (pte_present(pte))
- WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
+ for (i = 0; i < count; i++) {
+ /* we always have an extra reference */
+ uv_destroy_owned_page(pfn_to_phys(pfns[i]));
+ /* get rid of the extra reference */
+ put_page(pfn_to_page(pfns[i]));
+ cond_resched();
+ }
+}
+EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
+/**
+ * __s390_uv_destroy_range - Call the destroy secure page UVC on each page
+ * in the given range of the given address space.
+ * @mm: the mm to operate on
+ * @start: the start of the range
+ * @end: the end of the range
+ * @interruptible: if not 0, stop when a fatal signal is received
+ *
+ * Walk the given range of the given address space and call the destroy
+ * secure page UVC on each page. Optionally exit early if a fatal signal is
+ * pending.
+ *
+ * Return: 0 on success, -EINTR if the function stopped before completing
+ */
+int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool interruptible)
+{
+ struct reset_walk_state state = { .next = start };
+ int r = 1;
+
+ while (r > 0) {
+ state.count = 0;
+ mmap_read_lock(mm);
+ r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
+ mmap_read_unlock(mm);
+ cond_resched();
+ s390_uv_destroy_pfns(state.count, state.pfns);
+ if (interruptible && fatal_signal_pending(current))
+ return -EINTR;
+ }
return 0;
}
+EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
-static const struct mm_walk_ops reset_acc_walk_ops = {
- .pte_entry = __s390_reset_acc,
-};
+/**
+ * s390_unlist_old_asce - Remove the topmost level of page tables from the
+ * list of page tables of the gmap.
+ * @gmap: the gmap whose table is to be removed
+ *
+ * On s390x, KVM keeps a list of all pages containing the page tables of the
+ * gmap (the CRST list). This list is used at tear down time to free all
+ * pages that are now not needed anymore.
+ *
+ * This function removes the topmost page of the tree (the one pointed to by
+ * the ASCE) from the CRST list.
+ *
+ * This means that it will not be freed when the VM is torn down, and needs
+ * to be handled separately by the caller, unless a leak is actually
+ * intended. Notice that this function will only remove the page from the
+ * list, the page will still be used as a top level page table (and ASCE).
+ */
+void s390_unlist_old_asce(struct gmap *gmap)
+{
+ struct page *old;
-#include <linux/sched/mm.h>
-void s390_reset_acc(struct mm_struct *mm)
+ old = virt_to_page(gmap->table);
+ spin_lock(&gmap->guest_table_lock);
+ list_del(&old->lru);
+ /*
+ * Sometimes the topmost page might need to be "removed" multiple
+ * times, for example if the VM is rebooted into secure mode several
+ * times concurrently, or if s390_replace_asce fails after calling
+ * s390_remove_old_asce and is attempted again later. In that case
+ * the old asce has been removed from the list, and therefore it
+ * will not be freed when the VM terminates, but the ASCE is still
+ * in use and still pointed to.
+ * A subsequent call to replace_asce will follow the pointer and try
+ * to remove the same page from the list again.
+ * Therefore it's necessary that the page of the ASCE has valid
+ * pointers, so list_del can work (and do nothing) without
+ * dereferencing stale or invalid pointers.
+ */
+ INIT_LIST_HEAD(&old->lru);
+ spin_unlock(&gmap->guest_table_lock);
+}
+EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
+
+/**
+ * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
+ * @gmap: the gmap whose ASCE needs to be replaced
+ *
+ * If the allocation of the new top level page table fails, the ASCE is not
+ * replaced.
+ * In any case, the old ASCE is always removed from the gmap CRST list.
+ * Therefore the caller has to make sure to save a pointer to it
+ * beforehand, unless a leak is actually intended.
+ */
+int s390_replace_asce(struct gmap *gmap)
{
- if (!mm_is_protected(mm))
- return;
+ unsigned long asce;
+ struct page *page;
+ void *table;
+
+ s390_unlist_old_asce(gmap);
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
+ if (!page)
+ return -ENOMEM;
+ table = page_to_virt(page);
+ memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
+
/*
- * we might be called during
- * reset: we walk the pages and clear
- * close of all kvm file descriptors: we walk the pages and clear
- * exit of process on fd closure: vma already gone, do nothing
+ * The caller has to deal with the old ASCE, but here we make sure
+ * the new one is properly added to the CRST list, so that
+ * it will be freed when the VM is torn down.
*/
- if (!mmget_not_zero(mm))
- return;
- mmap_read_lock(mm);
- walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
- mmap_read_unlock(mm);
- mmput(mm);
+ spin_lock(&gmap->guest_table_lock);
+ list_add(&page->lru, &gmap->crst_list);
+ spin_unlock(&gmap->guest_table_lock);
+
+ /* Set new table origin while preserving existing ASCE control bits */
+ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
+ WRITE_ONCE(gmap->asce, asce);
+ WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
+ WRITE_ONCE(gmap->table, table);
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(s390_reset_acc);
+EXPORT_SYMBOL_GPL(s390_replace_asce);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index da36d13ffc16..c299a18273ff 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -9,6 +9,7 @@
#define KMSG_COMPONENT "hugetlb"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <asm/pgalloc.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
@@ -72,8 +73,8 @@ static inline unsigned long __pte_to_rste(pte_t pte)
static inline pte_t __rste_to_pte(unsigned long rste)
{
+ unsigned long pteval;
int present;
- pte_t pte;
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
present = pud_present(__pud(rste));
@@ -101,29 +102,21 @@ static inline pte_t __rste_to_pte(unsigned long rste)
* u unused, l large
*/
if (present) {
- pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
- pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ,
- _PAGE_READ);
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE,
- _PAGE_WRITE);
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID,
- _PAGE_INVALID);
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT,
- _PAGE_PROTECT);
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY,
- _PAGE_DIRTY);
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG,
- _PAGE_YOUNG);
+ pteval = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
+ pteval |= _PAGE_LARGE | _PAGE_PRESENT;
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_READ, _PAGE_READ);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, _PAGE_WRITE);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, _PAGE_INVALID);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, _PAGE_PROTECT);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, _PAGE_DIRTY);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, _PAGE_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
- _PAGE_SOFT_DIRTY);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY);
#endif
- pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
- _PAGE_NOEXEC);
+ pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC);
} else
- pte_val(pte) = _PAGE_INVALID;
- return pte;
+ pteval = _PAGE_INVALID;
+ return __pte(pteval);
}
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
@@ -167,7 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
rste |= _SEGMENT_ENTRY_LARGE;
clear_huge_pte_skeys(mm, rste);
- pte_val(*ptep) = rste;
+ set_pte(ptep, __pte(rste));
}
pte_t huge_ptep_get(pte_t *ptep)
@@ -244,16 +237,6 @@ int pud_huge(pud_t pud)
return pud_large(pud);
}
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags)
-{
- if (flags & FOLL_GET)
- return NULL;
-
- return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8c6f258a6183..97d66a3e60fb 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -37,7 +37,7 @@
#include <asm/kfence.h>
#include <asm/ptdump.h>
#include <asm/dma.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
@@ -47,6 +47,7 @@
#include <asm/kasan.h>
#include <asm/dma-mapping.h>
#include <asm/uv.h>
+#include <linux/virtio_anchor.h>
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
@@ -168,25 +169,16 @@ bool force_dma_unencrypted(struct device *dev)
return is_prot_virt_guest();
}
-#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
-
-int arch_has_restricted_virtio_memory_access(void)
-{
- return is_prot_virt_guest();
-}
-EXPORT_SYMBOL(arch_has_restricted_virtio_memory_access);
-
-#endif
-
/* protected virtualization */
static void pv_init(void)
{
if (!is_prot_virt_guest())
return;
+ virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
+
/* make sure bounce buffers are shared */
- swiotlb_force = SWIOTLB_FORCE;
- swiotlb_init(1);
+ swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
swiotlb_update_mem_attributes();
}
@@ -215,6 +207,9 @@ void free_initmem(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RW | SET_MEMORY_NX);
+ free_reserved_area(sclp_early_sccb,
+ sclp_early_sccb + EXT_SCCB_READ_SCP,
+ POISON_FREE_INITMEM, "unused early sccb");
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 483b9dbe0970..9f988d4582ed 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -175,7 +175,7 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
page = kasan_early_alloc_segment();
memset(page, 0, _SEGMENT_SIZE);
}
- pmd_val(*pm_dir) = __pa(page) | sgt_prot;
+ set_pmd(pm_dir, __pmd(__pa(page) | sgt_prot));
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -194,16 +194,16 @@ static void __init kasan_early_pgtable_populate(unsigned long address,
switch (mode) {
case POPULATE_ONE2ONE:
page = (void *)address;
- pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
break;
case POPULATE_MAP:
page = kasan_early_alloc_pages(0);
memset(page, 0, PAGE_SIZE);
- pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ set_pte(pt_dir, __pte(__pa(page) | pgt_prot));
break;
case POPULATE_ZERO_SHADOW:
page = kasan_early_shadow_page;
- pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
+ set_pte(pt_dir, __pte(__pa(page) | pgt_prot_zero));
break;
case POPULATE_SHALLOW:
/* should never happen */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 9663ce3625bc..1571cdcb0c50 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -4,8 +4,6 @@
*
* Copyright IBM Corp. 2009, 2015
*
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
*/
#include <linux/uaccess.h>
@@ -14,9 +12,17 @@
#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
+#include <asm/asm-extable.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
+#include <asm/abs_lowcore.h>
#include <asm/stacktrace.h>
+#include <asm/maccess.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+static __ro_after_init pte_t *memcpy_real_ptep;
+static DEFINE_MUTEX(memcpy_real_mutex);
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
{
@@ -77,144 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
return dst;
}
-static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
-{
- union register_pair _dst, _src;
- int rc = -EFAULT;
-
- _dst.even = (unsigned long) dest;
- _dst.odd = (unsigned long) count;
- _src.even = (unsigned long) src;
- _src.odd = (unsigned long) count;
- asm volatile (
- "0: mvcle %[dst],%[src],0\n"
- "1: jo 0b\n"
- " lhi %[rc],0\n"
- "2:\n"
- EX_TABLE(1b,2b)
- : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
- : : "cc", "memory");
- return rc;
-}
-
-static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
- unsigned long src,
- unsigned long count)
+void __init memcpy_real_init(void)
{
- int irqs_disabled, rc;
- unsigned long flags;
-
- if (!count)
- return 0;
- flags = arch_local_irq_save();
- irqs_disabled = arch_irqs_disabled_flags(flags);
- if (!irqs_disabled)
- trace_hardirqs_off();
- __arch_local_irq_stnsm(0xf8); // disable DAT
- rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
- if (flags & PSW_MASK_DAT)
- __arch_local_irq_stosm(0x04); // enable DAT
- if (!irqs_disabled)
- trace_hardirqs_on();
- __arch_local_irq_ssm(flags);
- return rc;
+ memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
+ if (!memcpy_real_ptep)
+ panic("Couldn't setup memcpy real area");
}
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, void *src, size_t count)
-{
- unsigned long _dest = (unsigned long)dest;
- unsigned long _src = (unsigned long)src;
- unsigned long _count = (unsigned long)count;
- int rc;
-
- if (S390_lowcore.nodat_stack != 0) {
- preempt_disable();
- rc = call_on_stack(3, S390_lowcore.nodat_stack,
- unsigned long, _memcpy_real,
- unsigned long, _dest,
- unsigned long, _src,
- unsigned long, _count);
- preempt_enable();
- return rc;
- }
- /*
- * This is a really early memcpy_real call, the stacks are
- * not set up yet. Just call _memcpy_real on the early boot
- * stack
- */
- return _memcpy_real(_dest, _src, _count);
-}
-
-/*
- * Copy memory in absolute mode (kernel to kernel)
- */
-void memcpy_absolute(void *dest, void *src, size_t count)
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long cr0, flags, prefix;
-
- flags = arch_local_irq_save();
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- prefix = store_prefix();
- if (prefix) {
- local_mcck_disable();
- set_prefix(0);
- memcpy(dest, src, count);
- set_prefix(prefix);
- local_mcck_enable();
- } else {
- memcpy(dest, src, count);
+ size_t len, copied, res = 0;
+ unsigned long phys, offset;
+ void *chunk;
+ pte_t pte;
+
+ while (count) {
+ phys = src & PAGE_MASK;
+ offset = src & ~PAGE_MASK;
+ chunk = (void *)(__memcpy_real_area + offset);
+ len = min(count, PAGE_SIZE - offset);
+ pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+ mutex_lock(&memcpy_real_mutex);
+ if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+ __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+ set_pte(memcpy_real_ptep, pte);
+ }
+ copied = copy_to_iter(chunk, len, iter);
+ mutex_unlock(&memcpy_real_mutex);
+
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- __ctl_load(cr0, 0, 0);
- arch_local_irq_restore(flags);
+ return res;
}
-/*
- * Copy memory from kernel (real) to user (virtual)
- */
-int copy_to_user_real(void __user *dest, void *src, unsigned long count)
+int memcpy_real(void *dest, unsigned long src, size_t count)
{
- int offs = 0, size, rc;
- char *buf;
-
- buf = (char *) __get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = -EFAULT;
- while (offs < count) {
- size = min(PAGE_SIZE, count - offs);
- if (memcpy_real(buf, src + offs, size))
- goto out;
- if (copy_to_user(dest + offs, buf, size))
- goto out;
- offs += size;
- }
- rc = 0;
-out:
- free_page((unsigned long) buf);
- return rc;
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dest;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (memcpy_real_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
}
/*
- * Check if physical address is within prefix or zero page
+ * Find CPU that owns swapped prefix page
*/
-static int is_swapped(unsigned long addr)
+static int get_swapped_owner(phys_addr_t addr)
{
- unsigned long lc;
+ phys_addr_t lc;
int cpu;
- if (addr < sizeof(struct lowcore))
- return 1;
for_each_online_cpu(cpu) {
- lc = (unsigned long) lowcore_ptr[cpu];
+ lc = virt_to_phys(lowcore_ptr[cpu]);
if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
continue;
- return 1;
+ return cpu;
}
- return 0;
+ return -1;
}
/*
@@ -225,18 +159,37 @@ static int is_swapped(unsigned long addr)
*/
void *xlate_dev_mem_ptr(phys_addr_t addr)
{
- void *bounce = (void *) addr;
+ void *ptr = phys_to_virt(addr);
+ void *bounce = ptr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned long size;
+ int this_cpu, cpu;
cpus_read_lock();
- preempt_disable();
- if (is_swapped(addr)) {
- size = PAGE_SIZE - (addr & ~PAGE_MASK);
- bounce = (void *) __get_free_page(GFP_ATOMIC);
- if (bounce)
- memcpy_absolute(bounce, (void *) addr, size);
+ this_cpu = get_cpu();
+ if (addr >= sizeof(struct lowcore)) {
+ cpu = get_swapped_owner(addr);
+ if (cpu < 0)
+ goto out;
}
- preempt_enable();
+ bounce = (void *)__get_free_page(GFP_ATOMIC);
+ if (!bounce)
+ goto out;
+ size = PAGE_SIZE - (addr & ~PAGE_MASK);
+ if (addr < sizeof(struct lowcore)) {
+ abs_lc = get_abs_lowcore(&flags);
+ ptr = (void *)abs_lc + addr;
+ memcpy(bounce, ptr, size);
+ put_abs_lowcore(abs_lc, flags);
+ } else if (cpu == this_cpu) {
+ ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+ memcpy(bounce, ptr, size);
+ } else {
+ memcpy(bounce, ptr, size);
+ }
+out:
+ put_cpu();
cpus_read_unlock();
return bounce;
}
@@ -244,8 +197,8 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
/*
* Free converted buffer for /dev/mem access (if necessary)
*/
-void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf)
+void unxlate_dev_mem_ptr(phys_addr_t addr, void *ptr)
{
- if ((void *) addr != buf)
- free_page((unsigned long) buf);
+ if (addr != virt_to_phys(ptr))
+ free_page((unsigned long)ptr);
}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index e54f928503c5..3327c47bc181 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,7 +37,7 @@ static inline int mmap_is_legacy(struct rlimit *rlim_stack)
unsigned long arch_mmap_rnd(void)
{
- return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -58,9 +58,9 @@ static inline unsigned long mmap_base(unsigned long rnd,
/*
* Top of mmap area (just below the process stack).
- * Leave at least a ~32 MB hole.
+ * Leave at least a ~128 MB hole.
*/
- gap_min = 32 * 1024 * 1024UL;
+ gap_min = SZ_128M;
gap_max = (STACK_TOP / 6) * 5;
if (gap < gap_min)
@@ -188,3 +188,23 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
+
+static const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_RO,
+ [VM_WRITE] = PAGE_RO,
+ [VM_WRITE | VM_READ] = PAGE_RO,
+ [VM_EXEC] = PAGE_RX,
+ [VM_EXEC | VM_READ] = PAGE_RX,
+ [VM_EXEC | VM_WRITE] = PAGE_RX,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_RO,
+ [VM_SHARED | VM_WRITE] = PAGE_RW,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW,
+ [VM_SHARED | VM_EXEC] = PAGE_RX,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX
+};
+DECLARE_VM_GET_PAGE_PROT
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 18a6381097a9..d5ea09d78938 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -14,6 +14,7 @@
#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
+#include <asm/asm-extable.h>
#include <asm/facility.h>
#include <asm/page-states.h>
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 654019181a37..85195c18b2e8 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -98,9 +98,9 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite(pte_mkdirty(new));
if (flags & SET_MEMORY_NX)
- pte_val(new) |= _PAGE_NOEXEC;
+ new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
else if (flags & SET_MEMORY_X)
- pte_val(new) &= ~_PAGE_NOEXEC;
+ new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
addr += PAGE_SIZE;
@@ -127,11 +127,11 @@ static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
prot &= ~_PAGE_NOEXEC;
ptep = pt_dir;
for (i = 0; i < PTRS_PER_PTE; i++) {
- pte_val(*ptep) = pte_addr | prot;
+ set_pte(ptep, __pte(pte_addr | prot));
pte_addr += PAGE_SIZE;
ptep++;
}
- pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
+ new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
update_page_count(PG_DIRECT_MAP_1M, -1);
@@ -148,9 +148,9 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite(pmd_mkdirty(new));
if (flags & SET_MEMORY_NX)
- pmd_val(new) |= _SEGMENT_ENTRY_NOEXEC;
+ new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
- pmd_val(new) &= ~_SEGMENT_ENTRY_NOEXEC;
+ new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
@@ -208,11 +208,11 @@ static int split_pud_page(pud_t *pudp, unsigned long addr)
prot &= ~_SEGMENT_ENTRY_NOEXEC;
pmdp = pm_dir;
for (i = 0; i < PTRS_PER_PMD; i++) {
- pmd_val(*pmdp) = pmd_addr | prot;
+ set_pmd(pmdp, __pmd(pmd_addr | prot));
pmd_addr += PMD_SIZE;
pmdp++;
}
- pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
+ new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
update_page_count(PG_DIRECT_MAP_2G, -1);
@@ -229,9 +229,9 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr,
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
if (flags & SET_MEMORY_NX)
- pud_val(new) |= _REGION_ENTRY_NOEXEC;
+ new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
- pud_val(new) &= ~_REGION_ENTRY_NOEXEC;
+ new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
@@ -347,23 +347,24 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long address;
+ pte_t *ptep, pte;
int nr, i, j;
- pte_t *pte;
for (i = 0; i < numpages;) {
address = (unsigned long)page_to_virt(page + i);
- pte = virt_to_kpte(address);
- nr = (unsigned long)pte >> ilog2(sizeof(long));
+ ptep = virt_to_kpte(address);
+ nr = (unsigned long)ptep >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
nr = min(numpages - i, nr);
if (enable) {
for (j = 0; j < nr; j++) {
- pte_val(*pte) &= ~_PAGE_INVALID;
+ pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
+ set_pte(ptep, pte);
address += PAGE_SIZE;
- pte++;
+ ptep++;
}
} else {
- ipte_range(pte, address, nr);
+ ipte_range(ptep, address, nr);
}
i += nr;
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 781965f7210e..2de48b2c1b04 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -53,17 +53,17 @@ __initcall(page_table_register_sysctl);
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_KERNEL, 2);
+ struct page *page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (!page)
return NULL;
- arch_set_page_dat(page, 2);
+ arch_set_page_dat(page, CRST_ALLOC_ORDER);
return (unsigned long *) page_to_virt(page);
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- free_pages((unsigned long) table, 2);
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
}
static void __crst_table_upgrade(void *arg)
@@ -176,7 +176,75 @@ void page_table_free_pgste(struct page *page)
#endif /* CONFIG_PGSTE */
/*
- * page table entry allocation/free routines.
+ * A 2KB-pgtable is either upper or lower half of a normal page.
+ * The second half of the page may be unused or used as another
+ * 2KB-pgtable.
+ *
+ * Whenever possible the parent page for a new 2KB-pgtable is picked
+ * from the list of partially allocated pages mm_context_t::pgtable_list.
+ * In case the list is empty a new parent page is allocated and added to
+ * the list.
+ *
+ * When a parent page gets fully allocated it contains 2KB-pgtables in both
+ * upper and lower halves and is removed from mm_context_t::pgtable_list.
+ *
+ * When 2KB-pgtable is freed from to fully allocated parent page that
+ * page turns partially allocated and added to mm_context_t::pgtable_list.
+ *
+ * If 2KB-pgtable is freed from the partially allocated parent page that
+ * page turns unused and gets removed from mm_context_t::pgtable_list.
+ * Furthermore, the unused parent page is released.
+ *
+ * As follows from the above, no unallocated or fully allocated parent
+ * pages are contained in mm_context_t::pgtable_list.
+ *
+ * The upper byte (bits 24-31) of the parent page _refcount is used
+ * for tracking contained 2KB-pgtables and has the following format:
+ *
+ * PP AA
+ * 01234567 upper byte (bits 24-31) of struct page::_refcount
+ * || ||
+ * || |+--- upper 2KB-pgtable is allocated
+ * || +---- lower 2KB-pgtable is allocated
+ * |+------- upper 2KB-pgtable is pending for removal
+ * +-------- lower 2KB-pgtable is pending for removal
+ *
+ * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
+ * using _refcount is possible).
+ *
+ * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
+ * The parent page is either:
+ * - added to mm_context_t::pgtable_list in case the second half of the
+ * parent page is still unallocated;
+ * - removed from mm_context_t::pgtable_list in case both hales of the
+ * parent page are allocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
+ * and the corresponding PP bit is set to 1 in a single atomic operation.
+ * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
+ * exclusive and may never be both set to 1!
+ * The parent page is either:
+ * - added to mm_context_t::pgtable_list in case the second half of the
+ * parent page is still allocated;
+ * - removed from mm_context_t::pgtable_list in case the second half of
+ * the parent page is unallocated;
+ * These operations are protected with mm_context_t::lock.
+ *
+ * It is important to understand that mm_context_t::lock only protects
+ * mm_context_t::pgtable_list and AA bits, but not the parent page itself
+ * and PP bits.
+ *
+ * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
+ * while both AA bits and the second PP bit are already unset. Then the
+ * parent page does not contain any 2KB-pgtable fragment anymore, and it has
+ * also been removed from mm_context_t::pgtable_list. It is safe to release
+ * the page therefore.
+ *
+ * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
+ * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
+ * while the PP bits are never used, nor such a page is added to or removed
+ * from mm_context_t::pgtable_list.
*/
unsigned long *page_table_alloc(struct mm_struct *mm)
{
@@ -192,14 +260,23 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
mask = atomic_read(&page->_refcount) >> 24;
- mask = (mask | (mask >> 4)) & 3;
- if (mask != 3) {
+ /*
+ * The pending removal bits must also be checked.
+ * Failure to do so might lead to an impossible
+ * value of (i.e 0x13 or 0x23) written to _refcount.
+ * Such values violate the assumption that pending and
+ * allocation bits are mutually exclusive, and the rest
+ * of the code unrails as result. That could lead to
+ * a whole bunch of races and corruptions.
+ */
+ mask = (mask | (mask >> 4)) & 0x03U;
+ if (mask != 0x03U) {
table = (unsigned long *) page_to_virt(page);
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
atomic_xor_bits(&page->_refcount,
- 1U << (bit + 24));
+ 0x01U << (bit + 24));
list_del(&page->lru);
}
}
@@ -220,12 +297,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
table = (unsigned long *) page_to_virt(page);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
- atomic_xor_bits(&page->_refcount, 3 << 24);
+ atomic_xor_bits(&page->_refcount, 0x03U << 24);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
} else {
/* Return the first 2K fragment of the page */
- atomic_xor_bits(&page->_refcount, 1 << 24);
+ atomic_xor_bits(&page->_refcount, 0x01U << 24);
memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
spin_lock_bh(&mm->context.lock);
list_add(&page->lru, &mm->context.pgtable_list);
@@ -234,29 +311,53 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
return table;
}
+static void page_table_release_check(struct page *page, void *table,
+ unsigned int half, unsigned int mask)
+{
+ char msg[128];
+
+ if (!IS_ENABLED(CONFIG_DEBUG_VM) || !mask)
+ return;
+ snprintf(msg, sizeof(msg),
+ "Invalid pgtable %p release half 0x%02x mask 0x%02x",
+ table, half, mask);
+ dump_page(page, msg);
+}
+
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
+ unsigned int mask, bit, half;
struct page *page;
- unsigned int bit, mask;
page = virt_to_page(table);
if (!mm_alloc_pgste(mm)) {
/* Free 2K page table fragment of a 4K page */
bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+ /*
+ * Mark the page for delayed release. The actual release
+ * will happen outside of the critical section from this
+ * function or from __tlb_remove_table()
+ */
+ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
mask >>= 24;
- if (mask & 3)
+ if (mask & 0x03U)
list_add(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.lock);
- if (mask != 0)
+ mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+ mask >>= 24;
+ if (mask != 0x00U)
return;
+ half = 0x01U << bit;
} else {
- atomic_xor_bits(&page->_refcount, 3U << 24);
+ half = 0x03U;
+ mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ mask >>= 24;
}
+ page_table_release_check(page, table, half, mask);
pgtable_pte_page_dtor(page);
__free_page(page);
}
@@ -272,47 +373,54 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
page = virt_to_page(table);
if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) ((unsigned long)table | 3);
+ table = (unsigned long *) ((unsigned long)table | 0x03U);
tlb_remove_table(tlb, table);
return;
}
bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
+ /*
+ * Mark the page for delayed release. The actual release will happen
+ * outside of the critical section from __tlb_remove_table() or from
+ * page_table_free()
+ */
mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
mask >>= 24;
- if (mask & 3)
+ if (mask & 0x03U)
list_add_tail(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.lock);
- table = (unsigned long *) ((unsigned long) table | (1U << bit));
+ table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
tlb_remove_table(tlb, table);
}
void __tlb_remove_table(void *_table)
{
- unsigned int mask = (unsigned long) _table & 3;
+ unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
void *table = (void *)((unsigned long) _table ^ mask);
struct page *page = virt_to_page(table);
- switch (mask) {
- case 0: /* pmd, pud, or p4d */
- free_pages((unsigned long) table, 2);
- break;
- case 1: /* lower 2K of a 4K page table */
- case 2: /* higher 2K of a 4K page table */
+ switch (half) {
+ case 0x00U: /* pmd, pud, or p4d */
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+ return;
+ case 0x01U: /* lower 2K of a 4K page table */
+ case 0x02U: /* higher 2K of a 4K page table */
mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
mask >>= 24;
- if (mask != 0)
- break;
- fallthrough;
- case 3: /* 4K page table with pgstes */
- if (mask & 3)
- atomic_xor_bits(&page->_refcount, 3 << 24);
- pgtable_pte_page_dtor(page);
- __free_page(page);
+ if (mask != 0x00U)
+ return;
+ break;
+ case 0x03U: /* 4K page table with pgstes */
+ mask = atomic_xor_bits(&page->_refcount, 0x03U << 24);
+ mask >>= 24;
break;
}
+
+ page_table_release_check(page, table, half, mask);
+ pgtable_pte_page_dtor(page);
+ __free_page(page);
}
/*
@@ -322,34 +430,34 @@ void __tlb_remove_table(void *_table)
static struct kmem_cache *base_pgt_cache;
-static unsigned long base_pgt_alloc(void)
+static unsigned long *base_pgt_alloc(void)
{
- u64 *table;
+ unsigned long *table;
table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
if (table)
- memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
- return (unsigned long) table;
+ memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+ return table;
}
-static void base_pgt_free(unsigned long table)
+static void base_pgt_free(unsigned long *table)
{
- kmem_cache_free(base_pgt_cache, (void *) table);
+ kmem_cache_free(base_pgt_cache, table);
}
-static unsigned long base_crst_alloc(unsigned long val)
+static unsigned long *base_crst_alloc(unsigned long val)
{
- unsigned long table;
+ unsigned long *table;
- table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ table = (unsigned long *)__get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
if (table)
- crst_table_init((unsigned long *)table, val);
+ crst_table_init(table, val);
return table;
}
-static void base_crst_free(unsigned long table)
+static void base_crst_free(unsigned long *table)
{
- free_pages(table, CRST_ALLOC_ORDER);
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
}
#define BASE_ADDR_END_FUNC(NAME, SIZE) \
@@ -377,14 +485,14 @@ static inline unsigned long base_lra(unsigned long address)
return real;
}
-static int base_page_walk(unsigned long origin, unsigned long addr,
+static int base_page_walk(unsigned long *origin, unsigned long addr,
unsigned long end, int alloc)
{
unsigned long *pte, next;
if (!alloc)
return 0;
- pte = (unsigned long *) origin;
+ pte = origin;
pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
do {
next = base_page_addr_end(addr, end);
@@ -393,13 +501,13 @@ static int base_page_walk(unsigned long origin, unsigned long addr,
return 0;
}
-static int base_segment_walk(unsigned long origin, unsigned long addr,
+static int base_segment_walk(unsigned long *origin, unsigned long addr,
unsigned long end, int alloc)
{
- unsigned long *ste, next, table;
+ unsigned long *ste, next, *table;
int rc;
- ste = (unsigned long *) origin;
+ ste = origin;
ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
do {
next = base_segment_addr_end(addr, end);
@@ -409,9 +517,9 @@ static int base_segment_walk(unsigned long origin, unsigned long addr,
table = base_pgt_alloc();
if (!table)
return -ENOMEM;
- *ste = table | _SEGMENT_ENTRY;
+ *ste = __pa(table) | _SEGMENT_ENTRY;
}
- table = *ste & _SEGMENT_ENTRY_ORIGIN;
+ table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
rc = base_page_walk(table, addr, next, alloc);
if (rc)
return rc;
@@ -422,13 +530,13 @@ static int base_segment_walk(unsigned long origin, unsigned long addr,
return 0;
}
-static int base_region3_walk(unsigned long origin, unsigned long addr,
+static int base_region3_walk(unsigned long *origin, unsigned long addr,
unsigned long end, int alloc)
{
- unsigned long *rtte, next, table;
+ unsigned long *rtte, next, *table;
int rc;
- rtte = (unsigned long *) origin;
+ rtte = origin;
rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
do {
next = base_region3_addr_end(addr, end);
@@ -438,9 +546,9 @@ static int base_region3_walk(unsigned long origin, unsigned long addr,
table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!table)
return -ENOMEM;
- *rtte = table | _REGION3_ENTRY;
+ *rtte = __pa(table) | _REGION3_ENTRY;
}
- table = *rtte & _REGION_ENTRY_ORIGIN;
+ table = __va(*rtte & _REGION_ENTRY_ORIGIN);
rc = base_segment_walk(table, addr, next, alloc);
if (rc)
return rc;
@@ -450,13 +558,13 @@ static int base_region3_walk(unsigned long origin, unsigned long addr,
return 0;
}
-static int base_region2_walk(unsigned long origin, unsigned long addr,
+static int base_region2_walk(unsigned long *origin, unsigned long addr,
unsigned long end, int alloc)
{
- unsigned long *rste, next, table;
+ unsigned long *rste, next, *table;
int rc;
- rste = (unsigned long *) origin;
+ rste = origin;
rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
do {
next = base_region2_addr_end(addr, end);
@@ -466,9 +574,9 @@ static int base_region2_walk(unsigned long origin, unsigned long addr,
table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!table)
return -ENOMEM;
- *rste = table | _REGION2_ENTRY;
+ *rste = __pa(table) | _REGION2_ENTRY;
}
- table = *rste & _REGION_ENTRY_ORIGIN;
+ table = __va(*rste & _REGION_ENTRY_ORIGIN);
rc = base_region3_walk(table, addr, next, alloc);
if (rc)
return rc;
@@ -478,13 +586,13 @@ static int base_region2_walk(unsigned long origin, unsigned long addr,
return 0;
}
-static int base_region1_walk(unsigned long origin, unsigned long addr,
+static int base_region1_walk(unsigned long *origin, unsigned long addr,
unsigned long end, int alloc)
{
- unsigned long *rfte, next, table;
+ unsigned long *rfte, next, *table;
int rc;
- rfte = (unsigned long *) origin;
+ rfte = origin;
rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
do {
next = base_region1_addr_end(addr, end);
@@ -494,9 +602,9 @@ static int base_region1_walk(unsigned long origin, unsigned long addr,
table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!table)
return -ENOMEM;
- *rfte = table | _REGION1_ENTRY;
+ *rfte = __pa(table) | _REGION1_ENTRY;
}
- table = *rfte & _REGION_ENTRY_ORIGIN;
+ table = __va(*rfte & _REGION_ENTRY_ORIGIN);
rc = base_region2_walk(table, addr, next, alloc);
if (rc)
return rc;
@@ -515,7 +623,7 @@ static int base_region1_walk(unsigned long origin, unsigned long addr,
*/
void base_asce_free(unsigned long asce)
{
- unsigned long table = asce & _ASCE_ORIGIN;
+ unsigned long *table = __va(asce & _ASCE_ORIGIN);
if (!asce)
return;
@@ -567,7 +675,7 @@ static int base_pgt_cache_init(void)
*/
unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
{
- unsigned long asce, table, end;
+ unsigned long asce, *table, end;
int rc;
if (base_pgt_cache_init())
@@ -578,25 +686,25 @@ unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
if (!table)
return 0;
rc = base_segment_walk(table, addr, end, 1);
- asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+ asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
} else if (end <= _REGION2_SIZE) {
table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
if (!table)
return 0;
rc = base_region3_walk(table, addr, end, 1);
- asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
} else if (end <= _REGION1_SIZE) {
table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
if (!table)
return 0;
rc = base_region2_walk(table, addr, end, 1);
- asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
} else {
table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
if (!table)
return 0;
rc = base_region1_walk(table, addr, end, 1);
- asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+ asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
}
if (rc) {
base_asce_free(asce);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index c16232cd0ec5..4909dcd762e8 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -115,7 +115,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(&mm->context.cpu_attach_mask,
cpumask_of(smp_processor_id()))) {
- pte_val(*ptep) |= _PAGE_INVALID;
+ set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_INVALID)));
mm->context.flush_mm = 1;
} else
ptep_ipte_global(mm, addr, ptep, nodat);
@@ -224,15 +224,15 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
*/
- pte_val(entry) |= _PAGE_DIRTY;
- pte_val(entry) &= ~_PAGE_PROTECT;
+ entry = set_pte_bit(entry, __pgprot(_PAGE_DIRTY));
+ entry = clear_pte_bit(entry, __pgprot(_PAGE_PROTECT));
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
pgste_val(pgste) |= PGSTE_UC_BIT;
}
#endif
- *ptep = entry;
+ set_pte(ptep, entry);
return pgste;
}
@@ -275,12 +275,12 @@ static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
pgste = pgste_update_all(old, pgste, mm);
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
_PGSTE_GPS_USAGE_UNUSED)
- pte_val(old) |= _PAGE_UNUSED;
+ old = set_pte_bit(old, __pgprot(_PAGE_UNUSED));
}
pgste = pgste_set_pte(ptep, pgste, new);
pgste_set_unlock(ptep, pgste);
} else {
- *ptep = new;
+ set_pte(ptep, new);
}
return old;
}
@@ -345,14 +345,14 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
struct mm_struct *mm = vma->vm_mm;
if (!MACHINE_HAS_NX)
- pte_val(pte) &= ~_PAGE_NOEXEC;
+ pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC));
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte, mm);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else {
- *ptep = pte;
+ set_pte(ptep, pte);
}
preempt_enable();
}
@@ -417,7 +417,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(&mm->context.cpu_attach_mask,
cpumask_of(smp_processor_id()))) {
- pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_INVALID)));
mm->context.flush_mm = 1;
if (mm_has_pgste(mm))
gmap_pmdp_invalidate(mm, addr);
@@ -469,7 +469,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
preempt_disable();
old = pmdp_flush_direct(mm, addr, pmdp);
- *pmdp = new;
+ set_pmd(pmdp, new);
preempt_enable();
return old;
}
@@ -482,7 +482,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
preempt_disable();
old = pmdp_flush_lazy(mm, addr, pmdp);
- *pmdp = new;
+ set_pmd(pmdp, new);
preempt_enable();
return old;
}
@@ -539,7 +539,7 @@ pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
preempt_disable();
old = pudp_flush_direct(mm, addr, pudp);
- *pudp = new;
+ set_pud(pudp, new);
preempt_enable();
return old;
}
@@ -579,9 +579,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
list_del(lh);
}
ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_INVALID;
+ set_pte(ptep, __pte(_PAGE_INVALID));
ptep++;
- pte_val(*ptep) = _PAGE_INVALID;
+ set_pte(ptep, __pte(_PAGE_INVALID));
return pgtable;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -646,12 +646,12 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
if (prot == PROT_NONE && !pte_i) {
ptep_flush_direct(mm, addr, ptep, nodat);
pgste = pgste_update_all(entry, pgste, mm);
- pte_val(entry) |= _PAGE_INVALID;
+ entry = set_pte_bit(entry, __pgprot(_PAGE_INVALID));
}
if (prot == PROT_READ && !pte_p) {
ptep_flush_direct(mm, addr, ptep, nodat);
- pte_val(entry) &= ~_PAGE_INVALID;
- pte_val(entry) |= _PAGE_PROTECT;
+ entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
+ entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
}
pgste_val(pgste) |= bit;
pgste = pgste_set_pte(ptep, pgste, entry);
@@ -675,8 +675,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
!(pte_val(pte) & _PAGE_PROTECT))) {
pgste_val(spgste) |= PGSTE_VSIE_BIT;
tpgste = pgste_get_lock(tptep);
- pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
- (pte_val(pte) & _PAGE_PROTECT);
+ tpte = __pte((pte_val(spte) & PAGE_MASK) |
+ (pte_val(pte) & _PAGE_PROTECT));
/* don't touch the storage key - it belongs to parent pgste */
tpgste = pgste_set_pte(tptep, tpgste, tpte);
pgste_set_unlock(tptep, tpgste);
@@ -748,7 +748,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@@ -773,10 +773,10 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
ptep_ipte_global(mm, addr, ptep, nodat);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
- pte_val(pte) |= _PAGE_PROTECT;
+ pte = set_pte_bit(pte, __pgprot(_PAGE_PROTECT));
else
- pte_val(pte) |= _PAGE_INVALID;
- *ptep = pte;
+ pte = set_pte_bit(pte, __pgprot(_PAGE_INVALID));
+ set_pte(ptep, pte);
}
pgste_set_unlock(ptep, pgste);
return dirty;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7d9705eeb02f..ee1a97078527 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2006
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#include <linux/memory_hotplug.h>
@@ -175,9 +174,9 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
if (!new_page)
goto out;
- pte_val(*pte) = __pa(new_page) | prot;
+ set_pte(pte, __pte(__pa(new_page) | prot));
} else {
- pte_val(*pte) = __pa(addr) | prot;
+ set_pte(pte, __pte(__pa(addr) | prot));
}
} else {
continue;
@@ -241,9 +240,9 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
} else if (pmd_none(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE) &&
- MACHINE_HAS_EDAT1 && addr && direct &&
+ MACHINE_HAS_EDAT1 && direct &&
!debug_pagealloc_enabled()) {
- pmd_val(*pmd) = __pa(addr) | prot;
+ set_pmd(pmd, __pmd(__pa(addr) | prot));
pages++;
continue;
} else if (!direct && MACHINE_HAS_EDAT1) {
@@ -258,7 +257,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
*/
new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
if (new_page) {
- pmd_val(*pmd) = __pa(new_page) | prot;
+ set_pmd(pmd, __pmd(__pa(new_page) | prot));
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
vmemmap_use_new_sub_pmd(addr, next);
@@ -337,9 +336,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_none(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE) &&
- MACHINE_HAS_EDAT2 && addr && direct &&
+ MACHINE_HAS_EDAT2 && direct &&
!debug_pagealloc_enabled()) {
- pud_val(*pud) = __pa(addr) | prot;
+ set_pud(pud, __pud(__pa(addr) | prot));
pages++;
continue;
}
@@ -562,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
}
/*
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserverd for 4KB mappings only.
+ */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+ pte_t *ptep = NULL;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd)) {
+ if (!alloc)
+ goto out;
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4d)
+ goto out;
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ if (!alloc)
+ goto out;
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pud)
+ goto out;
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud)) {
+ if (!alloc)
+ goto out;
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+ } else if (WARN_ON_ONCE(pud_large(*pud))) {
+ goto out;
+ }
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ if (!alloc)
+ goto out;
+ pte = vmem_pte_alloc();
+ if (!pte)
+ goto out;
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+ goto out;
+ }
+ ptep = pte_offset_kernel(pmd, addr);
+out:
+ return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+ pte_t *ptep, pte;
+
+ if (!IS_ALIGNED(addr, PAGE_SIZE))
+ return -EINVAL;
+ ptep = vmem_get_alloc_pte(addr, alloc);
+ if (!ptep)
+ return -ENOMEM;
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte = mk_pte_phys(phys, prot);
+ set_pte(ptep, pte);
+ return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+ int rc;
+
+ mutex_lock(&vmem_mutex);
+ rc = __vmem_map_4k_page(addr, phys, prot, true);
+ mutex_unlock(&vmem_mutex);
+ return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+ pte_t *ptep;
+
+ mutex_lock(&vmem_mutex);
+ ptep = virt_to_kpte(addr);
+ __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+ pte_clear(&init_mm, addr, ptep);
+ mutex_unlock(&vmem_mutex);
+}
+
+/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
* additional memory segments.
@@ -585,13 +681,12 @@ void __init vmem_map_init(void)
__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) {
- /*
- * Lowcore must be executable for LPSWE
- * and expoline trampoline branch instructions.
- */
+ /* lowcore requires 4k mapping for real addresses / prefixing */
+ set_memory_4k(0, LC_PAGES);
+
+ /* lowcore must be executable for LPSWE */
+ if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
- }
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 233cc9bcd652..af35052d06ed 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -7,7 +7,6 @@
* - HAVE_MARCH_Z196_FEATURES: laal, laalg
* - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
* - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
- * - PACK_STACK
* - 64BIT
*
* Copyright IBM Corp. 2012,2015
@@ -26,6 +25,7 @@
#include <linux/mm.h>
#include <linux/kernel.h>
#include <asm/cacheflush.h>
+#include <asm/extable.h>
#include <asm/dis.h>
#include <asm/facility.h>
#include <asm/nospec-branch.h>
@@ -570,15 +570,8 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
- if (test_facility(35)) {
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- } else {
- /* larl %r1,.+14 */
- EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
- /* ex 0,0(%r1) */
- EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
- }
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
/* j . */
EMIT4_PCREL(0xa7f40000, 0);
}
@@ -589,20 +582,12 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
(is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
- if (test_facility(35)) {
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
- /* br %r1 */
- _EMIT2(0x07f1);
- } else {
- /* ex 0,S390_lowcore.br_r1_tampoline */
- EMIT4_DISP(0x44000000, REG_0, REG_0,
- offsetof(struct lowcore, br_r1_trampoline));
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
- }
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ /* br %r1 */
+ _EMIT2(0x07f1);
}
}
@@ -622,19 +607,10 @@ static int get_probe_mem_regno(const u8 *insn)
return insn[1] >> 4;
}
-static bool ex_handler_bpf(const struct exception_table_entry *x,
- struct pt_regs *regs)
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
- int regno;
- u8 *insn;
-
regs->psw.addr = extable_fixup(x);
- insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16);
- regno = get_probe_mem_regno(insn);
- if (WARN_ON_ONCE(regno < 0))
- /* JIT bug - unexpected instruction. */
- return false;
- regs->gprs[regno] = 0;
+ regs->gprs[x->data] = 0;
return true;
}
@@ -642,16 +618,17 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
int probe_prg, int nop_prg)
{
struct exception_table_entry *ex;
+ int reg, prg;
s64 delta;
u8 *insn;
- int prg;
int i;
if (!fp->aux->extable)
/* Do nothing during early JIT passes. */
return 0;
insn = jit->prg_buf + probe_prg;
- if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0))
+ reg = get_probe_mem_regno(insn);
+ if (WARN_ON_ONCE(reg < 0))
/* JIT bug - unexpected probe instruction. */
return -1;
if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
@@ -678,7 +655,8 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
/* JIT bug - landing pad and extable must be close. */
return -1;
ex->fixup = delta;
- ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ ex->type = EX_TYPE_BPF;
+ ex->data = reg;
jit->excnt++;
}
return 0;
@@ -1369,7 +1347,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
jit->prg);
/*
- * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
+ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
* goto out;
*/
@@ -1381,9 +1359,9 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */
+ /* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
patch_2_clij = jit->prg;
- EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT,
+ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
2, jit->prg);
/*
@@ -1831,7 +1809,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Three initial passes:
* - 1/2: Determine clobbered registers
- * - 3: Calculate program size and addrs arrray
+ * - 3: Calculate program size and addrs array
*/
for (pass = 1; pass <= 3; pass++) {
if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index bf557a1b789c..5ae31ca9dd44 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o
+ pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2f9b78fa82a5..73cdc5539384 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -61,6 +61,12 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
static struct kmem_cache *zdev_fmb_cache;
+/* AEN structures that must be preserved over KVM module re-insertion */
+union zpci_sic_iib *zpci_aipb;
+EXPORT_SYMBOL_GPL(zpci_aipb);
+struct airq_iv *zpci_aif_sbv;
+EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -69,6 +75,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
list_for_each_entry(tmp, &zpci_list, entry) {
if (tmp->fid == fid) {
zdev = tmp;
+ zpci_zdev_get(zdev);
break;
}
}
@@ -119,11 +126,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
fib.pba = base;
fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -132,6 +141,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc)
zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
@@ -159,6 +170,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&zdev->unmapped_pages, 0);
fib.fmb_addr = virt_to_phys(zdev->fmb);
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
@@ -177,6 +189,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
if (!zdev->fmb)
return -EINVAL;
+ fib.gd = zdev->gisa;
+
/* Function measurement is disabled if fmb address is zero */
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
@@ -399,7 +413,7 @@ EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
+ struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
}
@@ -407,7 +421,7 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
+ struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
}
@@ -699,6 +713,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
zpci_update_fh(zdev, fh);
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
int zpci_disable_device(struct zpci_dev *zdev)
{
@@ -722,6 +737,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
return rc;
}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
/**
* zpci_hot_reset_device - perform a reset of the given zPCI function
@@ -771,7 +787,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64)zdev->dma_table);
+ virt_to_phys(zdev->dma_table));
else
rc = zpci_dma_init_device(zdev);
if (rc) {
@@ -798,7 +814,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
struct zpci_dev *zdev;
int rc;
- zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+ zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return ERR_PTR(-ENOMEM);
@@ -815,6 +831,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
kref_init(&zdev->kref);
mutex_init(&zdev->lock);
+ mutex_init(&zdev->kzdev_lock);
rc = zpci_init_iommu(zdev);
if (rc)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 5d77acbd1c87..6a8da1b742ae 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -145,9 +145,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
struct zpci_dev *zdev;
int devfn, rc, ret = 0;
- if (!zbus->function[0])
- return 0;
-
for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
zdev = zbus->function[devfn];
if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
@@ -184,26 +181,26 @@ void zpci_bus_scan_busses(void)
/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
* @zbus: the zbus holding the zdevices
- * @f0: function 0 of the bus
+ * @fr: PCI root function that will determine the bus's domain, and bus speeed
* @ops: the pci operations
*
- * Function zero is taken as a parameter as this is used to determine the
- * domain, multifunction property and maximum bus speed of the entire bus.
+ * The PCI function @fr determines the domain (its UID), multifunction property
+ * and maximum bus speed of the entire bus.
*
* Return: 0 on success, an error code otherwise
*/
-static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *f0, struct pci_ops *ops)
+static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
{
struct pci_bus *bus;
int domain;
- domain = zpci_alloc_domain((u16)f0->uid);
+ domain = zpci_alloc_domain((u16)fr->uid);
if (domain < 0)
return domain;
zbus->domain_nr = domain;
- zbus->multifunction = f0->rid_available;
- zbus->max_bus_speed = f0->max_bus_speed;
+ zbus->multifunction = fr->rid_available;
+ zbus->max_bus_speed = fr->max_bus_speed;
/*
* Note that the zbus->resources are taken over and zbus->resources
@@ -303,47 +300,6 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
}
}
-/* zpci_bus_create_hotplug_slots - Add hotplug slot(s) for device added to bus
- * @zdev: the zPCI device that was newly added
- *
- * Add the hotplug slot(s) for the newly added PCI function. Normally this is
- * simply the slot for the function itself. If however we are adding the
- * function 0 on a zbus, it might be that we already registered functions on
- * that zbus but could not create their hotplug slots yet so add those now too.
- *
- * Return: 0 on success, an error code otherwise
- */
-static int zpci_bus_create_hotplug_slots(struct zpci_dev *zdev)
-{
- struct zpci_bus *zbus = zdev->zbus;
- int devfn, rc = 0;
-
- rc = zpci_init_slot(zdev);
- if (rc)
- return rc;
- zdev->has_hp_slot = 1;
-
- if (zdev->devfn == 0 && zbus->multifunction) {
- /* Now that function 0 is there we can finally create the
- * hotplug slots for those functions with devfn != 0 that have
- * been parked in zbus->function[] waiting for us to be able to
- * create the PCI bus.
- */
- for (devfn = 1; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
- zdev = zbus->function[devfn];
- if (zdev && !zdev->has_hp_slot) {
- rc = zpci_init_slot(zdev);
- if (rc)
- return rc;
- zdev->has_hp_slot = 1;
- }
- }
-
- }
-
- return rc;
-}
-
static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
@@ -352,21 +308,19 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
+
zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
- if (zbus->bus) {
- if (zbus->multifunction && !zdev->rid_available) {
- WARN_ONCE(1, "rid_available not set for multifunction\n");
- goto error;
- }
-
- zpci_bus_create_hotplug_slots(zdev);
- } else {
- /* Hotplug slot will be created once function 0 appears */
- zbus->multifunction = 1;
+ if (zbus->multifunction && !zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ goto error;
}
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ goto error;
+ zdev->has_hp_slot = 1;
return 0;
@@ -400,7 +354,11 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
return -ENOMEM;
}
- if (zdev->devfn == 0) {
+ if (!zbus->bus) {
+ /* The UID of the first PCI function registered with a zpci_bus
+ * is used as the domain number for that bus. Currently there
+ * is exactly one zpci_bus per domain.
+ */
rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
if (rc)
goto error;
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e359d2686178..e96c9860e064 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
static inline void zpci_zdev_put(struct zpci_dev *zdev)
{
- kref_put(&zdev->kref, zpci_release_device);
+ if (zdev)
+ kref_put(&zdev->kref, zpci_release_device);
}
static inline void zpci_zdev_get(struct zpci_dev *zdev)
@@ -32,8 +33,8 @@ void zpci_free_domain(int domain);
int zpci_setup_bus_resources(struct zpci_dev *zdev,
struct list_head *resources);
-static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
- unsigned int devfn)
+static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
+ unsigned int devfn)
{
struct zpci_bus *zbus = bus->sysdata;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index be077b39da33..ee367798e388 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -17,17 +17,20 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
#include <asm/clp.h>
#include <uapi/asm/clp.h>
+#include "pci_bus.h"
+
bool zpci_unique_uid;
void update_uid_checking(bool new)
{
if (zpci_unique_uid != new)
- zpci_dbg(1, "uid checking:%d\n", new);
+ zpci_dbg(3, "uid checking:%d\n", new);
zpci_unique_uid = new;
}
@@ -103,6 +106,8 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
zdev->version = response->version;
+ zdev->maxstbl = response->maxstbl;
+ zdev->dtsm = response->dtsm;
switch (response->version) {
case 1:
@@ -226,12 +231,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
+ u32 gisa = 0;
*fh = 0;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
+ if (command != CLP_SET_DISABLE_PCI_FN)
+ gisa = zdev->gisa;
+
do {
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
@@ -240,6 +249,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
rrb->request.fh = zdev->fh;
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
+ rrb->request.gisa = gisa;
rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
@@ -403,8 +413,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
return;
zdev = get_zdev_by_fid(entry->fid);
- if (!zdev)
- zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ if (zdev) {
+ zpci_zdev_put(zdev);
+ return;
+ }
+ zpci_create_device(entry->fid, entry->fh, entry->config_state);
}
int clp_scan_pci_devices(void)
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 3408c0df3ebf..ca6bd98eec13 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -196,7 +196,7 @@ int __init zpci_debug_init(void)
if (!pci_debug_err_id)
return -EINVAL;
debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
- debug_set_level(pci_debug_err_id, 6);
+ debug_set_level(pci_debug_err_id, 3);
debugfs_root = debugfs_create_dir("pci", NULL);
return 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 1f4540d6bd2d..227cf0a62800 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -74,7 +74,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
if (!sto)
return NULL;
- set_rt_sto(entry, sto);
+ set_rt_sto(entry, virt_to_phys(sto));
validate_rt_entry(entry);
entry_clr_protected(entry);
}
@@ -91,7 +91,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry)
pto = dma_alloc_page_table();
if (!pto)
return NULL;
- set_st_pto(entry, pto);
+ set_st_pto(entry, virt_to_phys(pto));
validate_st_entry(entry);
entry_clr_protected(entry);
}
@@ -117,7 +117,7 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
return &pto[px];
}
-void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
{
if (flags & ZPCI_PTE_INVALID) {
invalidate_pt_entry(entry);
@@ -132,11 +132,11 @@ void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
entry_clr_protected(entry);
}
-static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
dma_addr_t dma_addr, size_t size, int flags)
{
unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+ phys_addr_t page_addr = (pa & PAGE_MASK);
unsigned long irq_flags;
unsigned long *entry;
int i, rc = 0;
@@ -217,7 +217,7 @@ out:
return ret;
}
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
dma_addr_t dma_addr, size_t size, int flags)
{
int rc;
@@ -400,7 +400,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
struct page *page;
- unsigned long pa;
+ phys_addr_t pa;
dma_addr_t map;
size = PAGE_ALIGN(size);
@@ -411,18 +411,18 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
pa = page_to_phys(page);
map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
if (dma_mapping_error(dev, map)) {
- free_pages(pa, get_order(size));
+ __free_pages(page, get_order(size));
return NULL;
}
atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
if (dma_handle)
*dma_handle = map;
- return (void *) pa;
+ return phys_to_virt(pa);
}
static void s390_dma_free(struct device *dev, size_t size,
- void *pa, dma_addr_t dma_handle,
+ void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
@@ -430,7 +430,7 @@ static void s390_dma_free(struct device *dev, size_t size,
size = PAGE_ALIGN(size);
atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long) pa, get_order(size));
+ free_pages((unsigned long)vaddr, get_order(size));
}
/* Map a segment into a contiguous dma address area */
@@ -443,7 +443,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
dma_addr_t dma_addr_base, dma_addr;
int flags = ZPCI_PTE_VALID;
struct scatterlist *s;
- unsigned long pa = 0;
+ phys_addr_t pa = 0;
int ret;
dma_addr_base = dma_alloc_address(dev, nr_pages);
@@ -598,7 +598,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
}
if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64)zdev->dma_table)) {
+ virt_to_phys(zdev->dma_table))) {
rc = -EIO;
goto free_bitmap;
}
@@ -666,7 +666,7 @@ static int __init dma_alloc_cpu_table_caches(void)
int __init zpci_dma_init(void)
{
- s390_iommu_aperture = (u64)high_memory;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
if (!s390_iommu_aperture_factor)
s390_iommu_aperture = ULONG_MAX;
else
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 2e3e5b278925..b9324ca2eb94 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -269,7 +269,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
if (!pdev)
- return;
+ goto no_pdev;
switch (ccdf->pec) {
case 0x003a: /* Service Action or Error Recovery Successful */
@@ -286,6 +286,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
break;
}
pci_dev_put(pdev);
+no_pdev:
+ zpci_zdev_put(zdev);
}
void zpci_event_error(void *data)
@@ -314,13 +316,11 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ bool existing_zdev = !!zdev;
enum zpci_state state;
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
- zpci_err("avail CCDF:\n");
- zpci_err_hex(ccdf, sizeof(*ccdf));
-
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
@@ -378,6 +378,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
+ if (existing_zdev)
+ zpci_zdev_put(zdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 28d863aaafea..56480be48244 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/delay.h>
#include <linux/jump_label.h>
+#include <asm/asm-extable.h>
#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
@@ -17,16 +18,40 @@
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
-static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+struct zpci_err_insn_data {
+ u8 insn;
+ u8 cc;
+ u8 status;
+ union {
+ struct {
+ u64 req;
+ u64 offset;
+ };
+ struct {
+ u64 addr;
+ u64 len;
+ };
+ };
+} __packed;
+
+static inline void zpci_err_insn_req(int lvl, u8 insn, u8 cc, u8 status,
+ u64 req, u64 offset)
+{
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .req = req, .offset = offset};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
+ u64 addr, u64 len)
{
- struct {
- u64 req;
- u64 offset;
- u8 cc;
- u8 status;
- } __packed data = {req, offset, cc, status};
-
- zpci_err_hex(&data, sizeof(data));
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .addr = addr, .len = len};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
}
/* Modify PCI Function Controls */
@@ -46,19 +71,28 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
{
+ bool retried = false;
u8 cc;
do {
cc = __mpcifc(req, fib, status);
- if (cc == 2)
+ if (cc == 2) {
msleep(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, *status, req, 0);
+ zpci_err_insn_req(0, 'M', cc, *status, req, 0);
+ else if (retried)
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_mod_fc);
/* Refresh PCI Translations */
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
@@ -79,16 +113,24 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
{
+ bool retried = false;
u8 cc, status;
do {
cc = __rpcit(fn, addr, range, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, addr, range);
+ zpci_err_insn_addr(0, 'R', cc, status, addr, range);
+ else if (retried)
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
if (cc == 1 && (status == 4 || status == 16))
return -ENOMEM;
@@ -97,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
{
if (!test_facility(72))
return -EIO;
@@ -108,6 +150,7 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return 0;
}
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
/* PCI Load */
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
@@ -143,17 +186,25 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_load(u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcilg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'l', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -197,7 +248,7 @@ int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
cc = __pcilg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'L', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -224,17 +275,25 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
int __zpci_store(u64 data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 's', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -277,7 +336,7 @@ int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
cc = __pcistg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'S', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -303,17 +362,25 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_store_block(const u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistb(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'b', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -357,7 +424,7 @@ int zpci_write_block(volatile void __iomem *dst,
cc = __pcistb_mio(src, (__force u64) dst, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) dst);
+ zpci_err_insn_addr(0, 'B', cc, status, (__force u64) dst, len);
return (cc > 0) ? -EIO : cc;
}
@@ -365,10 +432,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
static inline void __pciwb_mio(void)
{
- unsigned long unused = 0;
-
- asm volatile (".insn rre,0xb9d50000,%[op],%[op]\n"
- : [op] "+d" (unused));
+ asm volatile (".insn rre,0xb9d50000,0,0\n");
}
void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 954bb7a83124..a2b42a63a53b 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -11,16 +11,10 @@
#include <asm/isc.h>
#include <asm/airq.h>
+#include <asm/tpi.h>
static enum {FLOATING, DIRECTED} irq_delivery;
-#define SIC_IRQ_MODE_ALL 0
-#define SIC_IRQ_MODE_SINGLE 1
-#define SIC_IRQ_MODE_DIRECT 4
-#define SIC_IRQ_MODE_D_ALL 16
-#define SIC_IRQ_MODE_D_SINGLE 17
-#define SIC_IRQ_MODE_SET_CPU 18
-
/*
* summary bit vector
* FLOATING - summary bit per function
@@ -45,10 +39,11 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.isc = PCI_ISC;
fib.fmt0.sum = 1; /* enable summary notifications */
fib.fmt0.noi = airq_iv_end(zdev->aibv);
- fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+ fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
- fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+ fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
@@ -60,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
fib.fmt = 1;
fib.fmt1.noi = zdev->msi_nr_irqs;
fib.fmt1.dibvo = zdev->msi_first_bit;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
@@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
u8 cc, status;
fib.fmt = 1;
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -99,7 +98,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
}
/* Register adapter interruptions */
-int zpci_set_irq(struct zpci_dev *zdev)
+static int zpci_set_irq(struct zpci_dev *zdev)
{
int rc;
@@ -115,7 +114,7 @@ int zpci_set_irq(struct zpci_dev *zdev)
}
/* Clear adapter interruptions */
-int zpci_clear_irq(struct zpci_dev *zdev)
+static int zpci_clear_irq(struct zpci_dev *zdev)
{
int rc;
@@ -153,6 +152,7 @@ static struct irq_chip zpci_irq_chip = {
static void zpci_handle_cpu_local_irq(bool rescan)
{
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+ union zpci_sic_iib iib = {{0}};
unsigned long bit;
int irqs_on = 0;
@@ -164,7 +164,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
break;
bit = 0;
continue;
@@ -192,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
static void zpci_handle_fallback_irq(void)
{
struct cpu_irq_data *cpu_data;
+ union zpci_sic_iib iib = {{0}};
unsigned long cpu;
int irqs_on = 0;
@@ -202,7 +203,7 @@ static void zpci_handle_fallback_irq(void)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
cpu = 0;
continue;
@@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
}
}
-static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ bool floating = !tpi_info->directed_irq;
+
if (floating) {
inc_irq_stat(IRQIO_PCF);
zpci_handle_fallback_irq();
@@ -227,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
}
}
-static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ union zpci_sic_iib iib = {{0}};
unsigned long si, ai;
struct airq_iv *aibv;
int irqs_on = 0;
@@ -242,7 +248,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
/* End of second scan with interrupts on. */
break;
/* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
si = 0;
continue;
@@ -291,7 +297,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
zdev->aisb = bit;
/* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
if (!zdev->aibv)
return -ENOMEM;
@@ -303,7 +309,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Request MSI interrupts */
hwirq = bit;
- for_each_pci_msi_entry(msi, pdev) {
+ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
@@ -362,9 +368,7 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
return;
/* Release MSI interrupts */
- for_each_pci_msi_entry(msi, pdev) {
- if (!msi->irq)
- continue;
+ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
msi->msg.address_lo = 0;
@@ -387,13 +391,13 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
}
-void arch_restore_msi_irqs(struct pci_dev *pdev)
+bool arch_restore_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
if (!zdev->irqs_registered)
zpci_set_irq(zdev);
- default_restore_msi_irqs(pdev);
+ return true;
}
static struct airq_struct zpci_airq = {
@@ -404,11 +408,12 @@ static struct airq_struct zpci_airq = {
static void __init cpu_enable_directed_irq(void *unused)
{
union zpci_sic_iib iib = {{0}};
+ union zpci_sic_iib ziib = {{0}};
iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
- zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
}
static int __init zpci_directed_irq_init(void)
@@ -416,14 +421,14 @@ static int __init zpci_directed_irq_init(void)
union zpci_sic_iib iib = {{0}};
unsigned int cpu;
- zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+ zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
if (!zpci_sbv)
return -ENOMEM;
iib.diib.isc = PCI_ISC;
iib.diib.nr_cpus = num_possible_cpus();
- iib.diib.disb_addr = (u64) zpci_sbv->vector;
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+ iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
GFP_KERNEL);
@@ -438,7 +443,7 @@ static int __init zpci_directed_irq_init(void)
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
AIRQ_IV_DATA |
AIRQ_IV_CACHELINE |
- (!cpu ? AIRQ_IV_ALLOC : 0));
+ (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
if (!zpci_ibv[cpu])
return -ENOMEM;
}
@@ -455,7 +460,7 @@ static int __init zpci_floating_irq_init(void)
if (!zpci_ibv)
return -ENOMEM;
- zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
if (!zpci_sbv)
goto out_free;
@@ -468,6 +473,7 @@ out_free:
int __init zpci_irq_init(void)
{
+ union zpci_sic_iib iib = {{0}};
int rc;
irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
@@ -499,7 +505,7 @@ int __init zpci_irq_init(void)
* Enable floating IRQs (with suppression after one IRQ). When using
* directed IRQs this enables the fallback path.
*/
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
return 0;
out_airq:
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
new file mode 100644
index 000000000000..ff34baf50a3e
--- /dev/null
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2022. All rights reserved.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/kvm_host.h>
+
+struct zpci_kvm_hook zpci_kvm_hook;
+EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index c5b35ea129cf..588089332931 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/pci.h>
+#include <asm/asm-extable.h>
#include <asm/pci_io.h>
#include <asm/pci_debug.h>
@@ -63,7 +64,7 @@ static inline int __pcistg_mio_inuser(
asm volatile (
" sacf 256\n"
"0: llgc %[tmp],0(%[src])\n"
- " sllg %[val],%[val],8\n"
+ "4: sllg %[val],%[val],8\n"
" aghi %[src],1\n"
" ogr %[val],%[tmp]\n"
" brctg %[cnt],0b\n"
@@ -71,7 +72,7 @@ static inline int __pcistg_mio_inuser(
"2: ipm %[cc]\n"
" srl %[cc],28\n"
"3: sacf 768\n"
- EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
:
[src] "+a" (src), [cnt] "+d" (cnt),
[val] "+d" (val), [tmp] "=d" (tmp),
@@ -214,10 +215,10 @@ static inline int __pcilg_mio_inuser(
"2: ahi %[shift],-8\n"
" srlg %[tmp],%[val],0(%[shift])\n"
"3: stc %[tmp],0(%[dst])\n"
- " aghi %[dst],1\n"
+ "5: aghi %[dst],1\n"
" brctg %[cnt],2b\n"
"4: sacf 768\n"
- EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
:
[ioaddr_len] "+&d" (ioaddr_len.pair),
[cc] "+d" (cc), [val] "=d" (val),
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 360ada80d20c..d237bc6841cb 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -48,7 +48,6 @@ OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
$(call if_changed,objcopy)
-$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
- $(call if_changed_rule,as_o_S)
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro
-obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
+obj-y += kexec-purgatory.o
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 3d1c31e0cf3d..6f835124ee82 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -44,11 +44,14 @@
.endm
.macro MEMSWAP dst,src,buf,len
-10: cghi \len,bufsz
+10: larl %r0,purgatory_end
+ larl %r1,stack
+ slgr %r0,%r1
+ cgr \len,%r0
jh 11f
lgr %r4,\len
j 12f
-11: lghi %r4,bufsz
+11: lgr %r4,%r0
12: MEMCPY \buf,\dst,%r4
MEMCPY \dst,\src,%r4
@@ -135,12 +138,18 @@ ENTRY(purgatory_start)
.start_crash_kernel:
/* Location of purgatory_start in crash memory */
+ larl %r0,.base_crash
+ larl %r1,purgatory_start
+ slgr %r0,%r1
lgr %r8,%r13
- aghi %r8,-(.base_crash-purgatory_start)
+ sgr %r8,%r0
/* Destination for this code i.e. end of memory to be swapped. */
+ larl %r0,purgatory_end
+ larl %r1,purgatory_start
+ slgr %r0,%r1
lg %r9,crash_size-.base_crash(%r13)
- aghi %r9,-(purgatory_end-purgatory_start)
+ sgr %r9,%r0
/* Destination in crash memory, i.e. same as r9 but in crash memory. */
lg %r10,crash_start-.base_crash(%r13)
@@ -149,15 +158,19 @@ ENTRY(purgatory_start)
/* Buffer location (in crash memory) and size. As the purgatory is
* behind the point of no return it can re-use the stack as buffer.
*/
- lghi %r11,bufsz
+ larl %r11,purgatory_end
larl %r12,stack
+ slgr %r11,%r12
MEMCPY %r12,%r9,%r11 /* dst -> (crash) buf */
MEMCPY %r9,%r8,%r11 /* self -> dst */
/* Jump to new location. */
lgr %r7,%r9
- aghi %r7,.jump_to_dst-purgatory_start
+ larl %r0,.jump_to_dst
+ larl %r1,purgatory_start
+ slgr %r0,%r1
+ agr %r7,%r0
br %r7
.jump_to_dst:
@@ -169,7 +182,10 @@ ENTRY(purgatory_start)
/* Load new buffer location after jump */
larl %r7,stack
- aghi %r10,stack-purgatory_start
+ lgr %r0,%r7
+ larl %r1,purgatory_start
+ slgr %r0,%r1
+ agr %r10,%r0
MEMCPY %r10,%r7,%r11 /* (new) buf -> (crash) buf */
/* Now the code is set up to run from its designated location. Start
diff --git a/arch/s390/tools/gcc-thunk-extern.sh b/arch/s390/tools/gcc-thunk-extern.sh
new file mode 100755
index 000000000000..20bcbf6dd7ab
--- /dev/null
+++ b/arch/s390/tools/gcc-thunk-extern.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Borrowed from gcc: gcc/testsuite/gcc.target/s390/nobp-section-type-conflict.c
+# Checks that we don't get error: section type conflict with ‘put_page’.
+
+cat << "END" | $@ -x c - -fno-PIE -march=z10 -mindirect-branch=thunk-extern -mfunction-return=thunk-extern -mindirect-branch-table -O2 -c -o /dev/null
+int a;
+int b (void);
+void c (int);
+
+static void
+put_page (void)
+{
+ if (b ())
+ c (a);
+}
+
+__attribute__ ((__section__ (".init.text"), __cold__)) void
+d (void)
+{
+ put_page ();
+ put_page ();
+}
+END
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 606324e56e4e..cb0aff5c0187 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -27,24 +27,16 @@ static struct facility_def facility_defs[] = {
*/
.name = "FACILITIES_ALS",
.bits = (int[]){
-#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES
0, /* N3 instructions */
1, /* z/Arch mode installed */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
18, /* long displacement facility */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
21, /* extended-immediate facility */
25, /* store clock fast */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
27, /* mvcos */
32, /* compare and swap and store */
33, /* compare and swap and store 2 */
34, /* general instructions extension */
35, /* execute extensions */
-#endif
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
45, /* fast-BCR, etc. */
#endif
@@ -119,6 +111,7 @@ static struct facility_def facility_defs[] = {
193, /* bear enhancement facility */
194, /* rdp enhancement facility */
196, /* processor activity instrumentation facility */
+ 197, /* processor activity instrumentation extension 1 */
-1 /* END */
}
},
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 6db9820d104a..5f008e794898 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -276,6 +276,7 @@ b285 lpctl S_RD
b286 qsi S_RD
b287 lsctl S_RD
b28e qctri S_RD
+b28f qpaci S_RD
b299 srnm S_RD
b29c stfpc S_RD
b29d lfpc S_RD
@@ -1098,7 +1099,7 @@ eb61 stric RSY_RDRU
eb62 mric RSY_RDRU
eb6a asi SIY_IRD
eb6e alsi SIY_IRD
-eb71 lpswey SIY_URD
+eb71 lpswey SIY_RD
eb7a agsi SIY_IRD
eb7e algsi SIY_IRD
eb80 icmh RSY_RURD