aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig33
-rw-r--r--arch/s390/Kconfig.debug5
-rw-r--r--arch/s390/Makefile58
-rw-r--r--arch/s390/appldata/appldata_base.c122
-rw-r--r--arch/s390/boot/Makefile45
-rw-r--r--arch/s390/boot/als.c (renamed from arch/s390/kernel/als.c)20
-rw-r--r--arch/s390/boot/compressed/.gitignore1
-rw-r--r--arch/s390/boot/compressed/Makefile55
-rw-r--r--arch/s390/boot/compressed/head.S8
-rw-r--r--arch/s390/boot/compressed/misc.c37
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S15
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr.lds.S (renamed from arch/s390/boot/compressed/vmlinux.scr)4
-rw-r--r--arch/s390/boot/ebcdic.c2
-rw-r--r--arch/s390/boot/head.S (renamed from arch/s390/kernel/head.S)11
-rw-r--r--arch/s390/boot/head_kdump.S (renamed from arch/s390/kernel/head_kdump.S)0
-rw-r--r--arch/s390/boot/mem.S2
-rw-r--r--arch/s390/boot/sclp_early_core.c2
-rw-r--r--arch/s390/crypto/aes_s390.c1
-rw-r--r--arch/s390/crypto/ghash_s390.c1
-rw-r--r--arch/s390/crypto/sha1_s390.c1
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/s390/hypfs/hypfs_diag.c4
-rw-r--r--arch/s390/hypfs/inode.c6
-rw-r--r--arch/s390/include/asm/ap.h284
-rw-r--r--arch/s390/include/asm/atomic.h65
-rw-r--r--arch/s390/include/asm/cpu_mf.h12
-rw-r--r--arch/s390/include/asm/css_chars.h62
-rw-r--r--arch/s390/include/asm/ftrace.h6
-rw-r--r--arch/s390/include/asm/gmap.h10
-rw-r--r--arch/s390/include/asm/hugetlb.h5
-rw-r--r--arch/s390/include/asm/kprobes.h2
-rw-r--r--arch/s390/include/asm/kvm_host.h11
-rw-r--r--arch/s390/include/asm/lowcore.h2
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/nospec-insn.h4
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/include/asm/pgtable.h13
-rw-r--r--arch/s390/include/asm/purgatory.h6
-rw-r--r--arch/s390/include/asm/qdio.h1
-rw-r--r--arch/s390/include/asm/sections.h2
-rw-r--r--arch/s390/include/asm/setup.h4
-rw-r--r--arch/s390/include/uapi/asm/chsc.h10
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/include/uapi/asm/socket.h3
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h72
-rw-r--r--arch/s390/kernel/Makefile24
-rw-r--r--arch/s390/kernel/asm-offsets.c8
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/crash_dump.c134
-rw-r--r--arch/s390/kernel/early.c12
-rw-r--r--arch/s390/kernel/ebcdic.c36
-rw-r--r--arch/s390/kernel/entry.S8
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/ftrace.c2
-rw-r--r--arch/s390/kernel/head64.S43
-rw-r--r--arch/s390/kernel/kprobes.c86
-rw-r--r--arch/s390/kernel/mcount.S2
-rw-r--r--arch/s390/kernel/nospec-branch.c12
-rw-r--r--arch/s390/kernel/nospec-sysfs.c2
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/s390/kernel/perf_regs.c6
-rw-r--r--arch/s390/kernel/setup.c4
-rw-r--r--arch/s390/kernel/signal.c3
-rw-r--r--arch/s390/kernel/syscalls/Makefile6
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kernel/sysinfo.c4
-rw-r--r--arch/s390/kernel/time.c15
-rw-r--r--arch/s390/kernel/topology.c44
-rw-r--r--arch/s390/kernel/vdso.c2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S13
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c383
-rw-r--r--arch/s390/kvm/priv.c143
-rw-r--r--arch/s390/kvm/vsie.c11
-rw-r--r--arch/s390/lib/mem.S16
-rw-r--r--arch/s390/mm/cmm.c74
-rw-r--r--arch/s390/mm/extmem.c4
-rw-r--r--arch/s390/mm/fault.c15
-rw-r--r--arch/s390/mm/gmap.c454
-rw-r--r--arch/s390/mm/hugetlbpage.c24
-rw-r--r--arch/s390/mm/page-states.c2
-rw-r--r--arch/s390/mm/pageattr.c6
-rw-r--r--arch/s390/mm/pgalloc.c6
-rw-r--r--arch/s390/mm/pgtable.c159
-rw-r--r--arch/s390/net/bpf_jit_comp.c3
-rw-r--r--arch/s390/numa/numa.c16
-rw-r--r--arch/s390/pci/pci.c3
-rw-r--r--arch/s390/pci/pci_debug.c9
-rw-r--r--arch/s390/purgatory/Makefile12
-rw-r--r--arch/s390/purgatory/head.S45
-rw-r--r--arch/s390/purgatory/purgatory.c9
-rw-r--r--arch/s390/scripts/Makefile.chkbss11
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/s390/tools/gen_opcode_table.c4
96 files changed, 1873 insertions, 1051 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index baed39772c84..9a9c7a6fe925 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -106,7 +106,6 @@ config S390
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
- select ARCH_WANTS_UBSAN_NO_NULL
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
@@ -136,6 +135,7 @@ config S390
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
@@ -146,6 +146,7 @@ config S390
select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_UNCOMPRESSED
select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KRETPROBES
@@ -157,9 +158,11 @@ config S390
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select MODULES_USE_ELF_RELA
@@ -183,10 +186,6 @@ config PGTABLE_LEVELS
int
default 5
-source "init/Kconfig"
-
-source "kernel/Kconfig.freezer"
-
source "kernel/livepatch/Kconfig"
menu "Processor type and features"
@@ -514,8 +513,6 @@ config SCHED_TOPOLOGY
making when dealing with machines that have multi-threading,
multiple cores or multiple books.
-source kernel/Kconfig.preempt
-
source kernel/Kconfig.hz
config KEXEC
@@ -626,8 +623,6 @@ config FORCE_MAX_ZONEORDER
int
default "9"
-source "mm/Kconfig"
-
config MAX_PHYSMEM_BITS
int "Maximum size of supported physical memory in bits (42-53)"
range 42 53
@@ -797,10 +792,6 @@ config CRASH_DUMP
endmenu
-menu "Executable file formats / Emulations"
-
-source "fs/Kconfig.binfmt"
-
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
@@ -818,8 +809,6 @@ config SECCOMP
If unsure, say Y.
-endmenu
-
menu "Power Management"
config ARCH_HIBERNATION_POSSIBLE
@@ -829,30 +818,16 @@ source "kernel/power/Kconfig"
endmenu
-source "net/Kconfig"
-
config PCMCIA
def_bool n
config CCW
def_bool y
-source "drivers/Kconfig"
-
config HAVE_PNETID
tristate
default (SMC || CCWGROUP)
-source "fs/Kconfig"
-
-source "arch/s390/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
menu "Virtualization"
config PFAULT
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 2cfdfbf8d320..190527560b2c 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,11 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-menu "Kernel hacking"
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-source "lib/Kconfig.debug"
-
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
@@ -20,5 +17,3 @@ config S390_PTDUMP
config EARLY_PRINTK
def_bool y
-
-endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 68a690442be0..ba6d122526fb 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,8 +14,18 @@ LD_BFD := elf64-s390
LDFLAGS := -m elf64_s390
KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
-KBUILD_CFLAGS += -m64
KBUILD_AFLAGS += -m64
+KBUILD_CFLAGS += -m64
+aflags_dwarf := -Wa,-gdwarf-2
+KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
+KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
+KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
+KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
UTS_MACHINE := s390x
STACK_SIZE := 16384
CHECKFLAGS += -D__s390__ -D__s390x__
@@ -52,18 +62,14 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
#
cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
-# old style option for packed stacks
-ifeq ($(call cc-option-yn,-mkernel-backchain),y)
-cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK
-aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
-endif
-
-# new style option for packed stacks
ifeq ($(call cc-option-yn,-mpacked-stack),y)
cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
endif
+KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y)
+KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y)
+
ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
ifneq ($(call cc-option-yn,-mstack-size=8192),y)
@@ -71,8 +77,11 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
endif
endif
-ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
-cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
+ifdef CONFIG_WARN_DYNAMIC_STACK
+ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
+ KBUILD_CFLAGS += -mwarn-dynamicstack
+ KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack
+ endif
endif
ifdef CONFIG_EXPOLINE
@@ -82,17 +91,20 @@ ifdef CONFIG_EXPOLINE
CC_FLAGS_EXPOLINE += -mindirect-branch-table
export CC_FLAGS_EXPOLINE
cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
+ aflags-y += -DCC_USING_EXPOLINE
endif
endif
ifdef CONFIG_FUNCTION_TRACER
-# make use of hotpatch feature if the compiler supports it
-cc_hotpatch := -mhotpatch=0,3
-ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
-CC_FLAGS_FTRACE := $(cc_hotpatch)
-KBUILD_AFLAGS += -DCC_USING_HOTPATCH
-KBUILD_CFLAGS += -DCC_USING_HOTPATCH
-endif
+ ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+ # make use of hotpatch feature if the compiler supports it
+ cc_hotpatch := -mhotpatch=0,3
+ ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+ CC_FLAGS_FTRACE := $(cc_hotpatch)
+ KBUILD_AFLAGS += -DCC_USING_HOTPATCH
+ KBUILD_CFLAGS += -DCC_USING_HOTPATCH
+ endif
+ endif
endif
# Test CFI features of binutils
@@ -102,11 +114,12 @@ KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi)
KBUILD_AFLAGS += $(aflags-y) $(cfi)
+export KBUILD_AFLAGS_DECOMPRESSOR
+export KBUILD_CFLAGS_DECOMPRESSOR
OBJCOPYFLAGS := -O binary
-head-y := arch/s390/kernel/head.o
-head-y += arch/s390/kernel/head64.o
+head-y := arch/s390/kernel/head64.o
# See arch/s390/Kbuild for content of core part of the kernel
core-y += arch/s390/
@@ -121,7 +134,7 @@ boot := arch/s390/boot
syscalls := arch/s390/kernel/syscalls
tools := arch/s390/tools
-all: image bzImage
+all: bzImage
#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
KBUILD_IMAGE := $(boot)/bzImage
@@ -129,7 +142,7 @@ KBUILD_IMAGE := $(boot)/bzImage
install: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-image bzImage: vmlinux
+bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
zfcpdump:
@@ -152,8 +165,7 @@ archprepare:
# Don't use tabs in echo arguments
define archhelp
- echo '* image - Kernel image for IPL ($(boot)/image)'
- echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)'
+ echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)'
echo ' install - Install kernel using'
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ee6a9c387c87..9bf8489df6e6 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -206,35 +206,28 @@ static int
appldata_timer_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- unsigned int len;
- char buf[2];
+ int timer_active = appldata_timer_active;
+ int zero = 0;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &timer_active,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
+
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
- if (!*lenp || *ppos) {
- *lenp = 0;
- return 0;
- }
- if (!write) {
- strncpy(buf, appldata_timer_active ? "1\n" : "0\n",
- ARRAY_SIZE(buf));
- len = strnlen(buf, ARRAY_SIZE(buf));
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
- goto out;
- }
- len = *lenp;
- if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
- return -EFAULT;
spin_lock(&appldata_timer_lock);
- if (buf[0] == '1')
+ if (timer_active)
__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
- else if (buf[0] == '0')
+ else
__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
spin_unlock(&appldata_timer_lock);
-out:
- *lenp = len;
- *ppos += len;
return 0;
}
@@ -248,37 +241,24 @@ static int
appldata_interval_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- unsigned int len;
- int interval;
- char buf[16];
+ int interval = appldata_interval;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &interval,
+ .maxlen = sizeof(int),
+ .extra1 = &one,
+ };
- if (!*lenp || *ppos) {
- *lenp = 0;
- return 0;
- }
- if (!write) {
- len = sprintf(buf, "%i\n", appldata_interval);
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
- goto out;
- }
- len = *lenp;
- if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
- return -EFAULT;
- interval = 0;
- sscanf(buf, "%i", &interval);
- if (interval <= 0)
- return -EINVAL;
+ rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
spin_lock(&appldata_timer_lock);
appldata_interval = interval;
__appldata_vtimer_setup(APPLDATA_MOD_TIMER);
spin_unlock(&appldata_timer_lock);
-out:
- *lenp = len;
- *ppos += len;
return 0;
}
@@ -293,10 +273,17 @@ appldata_generic_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
- unsigned int len;
- int rc, found;
- char buf[2];
struct list_head *lh;
+ int rc, found;
+ int active;
+ int zero = 0;
+ int one = 1;
+ struct ctl_table ctl_entry = {
+ .data = &active,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
found = 0;
mutex_lock(&appldata_ops_mutex);
@@ -317,31 +304,15 @@ appldata_generic_handler(struct ctl_table *ctl, int write,
}
mutex_unlock(&appldata_ops_mutex);
- if (!*lenp || *ppos) {
- *lenp = 0;
+ active = ops->active;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write) {
module_put(ops->owner);
- return 0;
- }
- if (!write) {
- strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf));
- len = strnlen(buf, ARRAY_SIZE(buf));
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len)) {
- module_put(ops->owner);
- return -EFAULT;
- }
- goto out;
- }
- len = *lenp;
- if (copy_from_user(buf, buffer,
- len > sizeof(buf) ? sizeof(buf) : len)) {
- module_put(ops->owner);
- return -EFAULT;
+ return rc;
}
mutex_lock(&appldata_ops_mutex);
- if ((buf[0] == '1') && (ops->active == 0)) {
+ if (active && (ops->active == 0)) {
// protect work queue callback
if (!try_module_get(ops->owner)) {
mutex_unlock(&appldata_ops_mutex);
@@ -359,7 +330,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write,
module_put(ops->owner);
} else
ops->active = 1;
- } else if ((buf[0] == '0') && (ops->active == 1)) {
+ } else if (!active && (ops->active == 1)) {
ops->active = 0;
rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
(unsigned long) ops->data, ops->size,
@@ -370,9 +341,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write,
module_put(ops->owner);
}
mutex_unlock(&appldata_ops_mutex);
-out:
- *lenp = len;
- *ppos += len;
module_put(ops->owner);
return 0;
}
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index d1fa37fcce83..9e6668ee93de 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -3,19 +3,52 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-targets := image
-targets += bzImage
-subdir- := compressed
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
-$(obj)/image: vmlinux FORCE
- $(call if_changed,objcopy)
+KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
+KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+
+#
+# Use -march=z900 for als.c to be able to print an error
+# message if the kernel is started on a machine which is too old
+#
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
+AFLAGS_head.o += -march=z900
+AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
+AFLAGS_mem.o += -march=z900
+CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
+CFLAGS_als.o += -march=z900
+CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp_early_core.o += -march=z900
+endif
+
+CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
+
+obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o
+targets := bzImage startup.a $(obj-y)
+subdir- := compressed
+
+OBJECTS := $(addprefix $(obj)/,$(obj-y))
$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy)
-$(obj)/compressed/vmlinux: FORCE
+$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+quiet_cmd_ar = AR $@
+ cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^)
+
+$(obj)/startup.a: $(OBJECTS) FORCE
+ $(call if_changed,ar)
+
install: $(CONFIGURE) $(obj)/bzImage
sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
+
+chkbss := $(OBJECTS)
+chkbss-target := $(obj)/startup.a
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/als.c b/arch/s390/boot/als.c
index d1892bf36cab..d592e0d90d9f 100644
--- a/arch/s390/kernel/als.c
+++ b/arch/s390/boot/als.c
@@ -3,12 +3,10 @@
* Copyright IBM Corp. 2016
*/
#include <linux/kernel.h>
-#include <linux/init.h>
#include <asm/processor.h>
#include <asm/facility.h>
#include <asm/lowcore.h>
#include <asm/sclp.h>
-#include "entry.h"
/*
* The code within this file will be called very early. It may _not_
@@ -18,9 +16,9 @@
* For temporary objects the stack (16k) should be used.
*/
-static unsigned long als[] __initdata = { FACILITIES_ALS };
+static unsigned long als[] = { FACILITIES_ALS };
-static void __init u16_to_hex(char *str, u16 val)
+static void u16_to_hex(char *str, u16 val)
{
int i, num;
@@ -33,9 +31,9 @@ static void __init u16_to_hex(char *str, u16 val)
*str = '\0';
}
-static void __init print_machine_type(void)
+static void print_machine_type(void)
{
- static char mach_str[80] __initdata = "Detected machine-type number: ";
+ static char mach_str[80] = "Detected machine-type number: ";
char type_str[5];
struct cpuid id;
@@ -46,7 +44,7 @@ static void __init print_machine_type(void)
sclp_early_printk(mach_str);
}
-static void __init u16_to_decimal(char *str, u16 val)
+static void u16_to_decimal(char *str, u16 val)
{
int div = 1;
@@ -60,9 +58,9 @@ static void __init u16_to_decimal(char *str, u16 val)
*str = '\0';
}
-static void __init print_missing_facilities(void)
+static void print_missing_facilities(void)
{
- static char als_str[80] __initdata = "Missing facilities: ";
+ static char als_str[80] = "Missing facilities: ";
unsigned long val;
char val_str[6];
int i, j, first;
@@ -95,7 +93,7 @@ static void __init print_missing_facilities(void)
sclp_early_printk("See Principles of Operations for facility bits\n");
}
-static void __init facility_mismatch(void)
+static void facility_mismatch(void)
{
sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
print_machine_type();
@@ -103,7 +101,7 @@ static void __init facility_mismatch(void)
disabled_wait(0x8badcccc);
}
-void __init verify_facilities(void)
+void verify_facilities(void)
{
int i;
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
index 2088cc140629..45aeb4f08752 100644
--- a/arch/s390/boot/compressed/.gitignore
+++ b/arch/s390/boot/compressed/.gitignore
@@ -1,4 +1,5 @@
sizes.h
vmlinux
vmlinux.lds
+vmlinux.scr.lds
vmlinux.bin.full
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 5766f7b9b271..04609478d18b 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -6,39 +6,29 @@
#
KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o
targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
-targets += misc.o piggy.o sizes.h head.o
-
-KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2
-KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
-KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float
-KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
-KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h)
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
+KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
+KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
-OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o)
-OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o
-OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
+OBJECTS := $(addprefix $(obj)/,$(obj-y))
LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS)
$(call if_changed,ld)
-TRIM_HEAD_SIZE := 0x11000
-
-sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p'
+# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p'
quiet_cmd_sizes = GEN $@
cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
-quiet_cmd_trim_head = TRIM $@
- cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@
-
$(obj)/sizes.h: vmlinux
$(call if_changed,sizes)
@@ -48,21 +38,18 @@ $(obj)/head.o: $(obj)/sizes.h
CFLAGS_misc.o += -I$(objtree)/$(obj)
$(obj)/misc.o: $(obj)/sizes.h
-OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S
-$(obj)/vmlinux.bin.full: vmlinux
+OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
$(call if_changed,objcopy)
-$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full
- $(call if_changed,trim_head)
-
vmlinux.bin.all-y := $(obj)/vmlinux.bin
-suffix-$(CONFIG_KERNEL_GZIP) := gz
-suffix-$(CONFIG_KERNEL_BZIP2) := bz2
-suffix-$(CONFIG_KERNEL_LZ4) := lz4
-suffix-$(CONFIG_KERNEL_LZMA) := lzma
-suffix-$(CONFIG_KERNEL_LZO) := lzo
-suffix-$(CONFIG_KERNEL_XZ) := xz
+suffix-$(CONFIG_KERNEL_GZIP) := .gz
+suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
+suffix-$(CONFIG_KERNEL_LZ4) := .lz4
+suffix-$(CONFIG_KERNEL_LZMA) := .lzma
+suffix-$(CONFIG_KERNEL_LZO) := .lzo
+suffix-$(CONFIG_KERNEL_XZ) := .xz
$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
$(call if_changed,gzip)
@@ -78,5 +65,9 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
$(call if_changed,xzkern)
LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y)
$(call if_changed,ld)
+
+chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS))
+chkbss-target := $(obj)/vmlinux.bin
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index 9f94eca0f467..df8dbbc17bcc 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -15,7 +15,7 @@
#include "sizes.h"
__HEAD
-ENTRY(startup_continue)
+ENTRY(startup_decompressor)
basr %r13,0 # get base
.LPG1:
# setup stack
@@ -23,7 +23,7 @@ ENTRY(startup_continue)
aghi %r15,-160
brasl %r14,decompress_kernel
# Set up registers for memory mover. We move the decompressed image to
- # 0x11000, where startup_continue of the decompressed image is supposed
+ # 0x100000, where startup_continue of the decompressed image is supposed
# to be.
lgr %r4,%r2
lg %r2,.Loffset-.LPG1(%r13)
@@ -33,7 +33,7 @@ ENTRY(startup_continue)
la %r1,0x200
mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13)
# When the memory mover is done we pass control to
- # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in
+ # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in
# the decompressed image.
lgr %r6,%r2
br %r1
@@ -47,6 +47,6 @@ mover_end:
.Lstack:
.quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
.Loffset:
- .quad 0x11000
+ .quad 0x100000
.Lmvsize:
.quad SZ__bss_start
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 511b2cc9b91a..f66ad73c205b 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -71,43 +71,6 @@ static int puts(const char *s)
return 0;
}
-void *memset(void *s, int c, size_t n)
-{
- char *xs;
-
- xs = s;
- while (n--)
- *xs++ = c;
- return s;
-}
-
-void *memcpy(void *dest, const void *src, size_t n)
-{
- const char *s = src;
- char *d = dest;
-
- while (n--)
- *d++ = *s++;
- return dest;
-}
-
-void *memmove(void *dest, const void *src, size_t n)
-{
- const char *s = src;
- char *d = dest;
-
- if (d <= s) {
- while (n--)
- *d++ = *s++;
- } else {
- d += n;
- s += n;
- while (n--)
- *--d = *--s;
- }
- return dest;
-}
-
static void error(char *x)
{
unsigned long long psw = 0x000a0000deadbeefULL;
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index d43c2db12d30..b16ac8b3c439 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -23,13 +23,10 @@ SECTIONS
*(.text.*)
_etext = . ;
}
- .rodata.compressed : {
- *(.rodata.compressed)
- }
.rodata : {
_rodata = . ;
*(.rodata) /* read-only data */
- *(.rodata.*)
+ *(EXCLUDE_FILE (*piggy.o) .rodata.compressed)
_erodata = . ;
}
.data : {
@@ -38,6 +35,15 @@ SECTIONS
*(.data.*)
_edata = . ;
}
+ startup_continue = 0x100000;
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
+ . = 0x100000;
+#else
+ . = ALIGN(8);
+#endif
+ .rodata.compressed : {
+ *(.rodata.compressed)
+ }
. = ALIGN(256);
.bss : {
_bss = . ;
@@ -54,5 +60,6 @@ SECTIONS
*(.eh_frame)
*(__ex_table)
*(*__ksymtab*)
+ *(___kcrctab*)
}
}
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr.lds.S
index 42a242597f34..ff01d18c9222 100644
--- a/arch/s390/boot/compressed/vmlinux.scr
+++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S
@@ -2,10 +2,14 @@
SECTIONS
{
.rodata.compressed : {
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
input_len = .;
LONG(input_data_end - input_data) input_data = .;
+#endif
*(.data)
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
output_len = . - 4;
input_data_end = .;
+#endif
}
}
diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c
new file mode 100644
index 000000000000..7391e7d36086
--- /dev/null
+++ b/arch/s390/boot/ebcdic.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/ebcdic.c"
diff --git a/arch/s390/kernel/head.S b/arch/s390/boot/head.S
index 5c42f16a54c4..f721913b73f1 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/boot/head.S
@@ -272,14 +272,14 @@ iplstart:
.org 0x10000
ENTRY(startup)
j .Lep_startup_normal
- .org 0x10008
+ .org EP_OFFSET
#
# This is a list of s390 kernel entry points. At address 0x1000f the number of
# valid entry points is stored.
#
# IMPORTANT: Do not change this table, it is s390 kernel ABI!
#
- .ascii "S390EP"
+ .ascii EP_STRING
.byte 0x00,0x01
#
# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
@@ -310,10 +310,11 @@ ENTRY(startup_kdump)
l %r15,.Lstack-.LPG0(%r13)
ahi %r15,-STACK_FRAME_OVERHEAD
brasl %r14,verify_facilities
-# For uncompressed images, continue in
-# arch/s390/kernel/head64.S. For compressed images, continue in
-# arch/s390/boot/compressed/head.S.
+#ifdef CONFIG_KERNEL_UNCOMPRESSED
jg startup_continue
+#else
+ jg startup_decompressor
+#endif
.Lstack:
.long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/boot/head_kdump.S
index 174d6959bf5b..174d6959bf5b 100644
--- a/arch/s390/kernel/head_kdump.S
+++ b/arch/s390/boot/head_kdump.S
diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S
new file mode 100644
index 000000000000..b33463633f03
--- /dev/null
+++ b/arch/s390/boot/mem.S
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../lib/mem.S"
diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c
new file mode 100644
index 000000000000..5a19fd7020b5
--- /dev/null
+++ b/arch/s390/boot/sclp_early_core.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../drivers/s390/char/sclp_early_core.c"
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index ad47abd08630..c54cb26eb7f5 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1035,7 +1035,6 @@ static struct aead_alg gcm_aes_aead = {
.chunksize = AES_BLOCK_SIZE,
.base = {
- .cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct s390_aes_ctx),
.cra_priority = 900,
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 3b7f96c9eead..86aed30fad3a 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -128,7 +128,6 @@ static struct shash_alg ghash_alg = {
.cra_name = "ghash",
.cra_driver_name = "ghash-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a00c17f761c1..009572e8276d 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -78,7 +78,6 @@ static struct shash_alg alg = {
.cra_name = "sha1",
.cra_driver_name= "sha1-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 944aa6b237cd..62833a1d8724 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -71,7 +71,6 @@ static struct shash_alg sha256_alg = {
.cra_name = "sha256",
.cra_driver_name= "sha256-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -108,7 +107,6 @@ static struct shash_alg sha224_alg = {
.cra_name = "sha224",
.cra_driver_name= "sha224-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index b17eded532b1..be589c340d15 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -76,7 +76,6 @@ static struct shash_alg sha512_alg = {
.cra_name = "sha512",
.cra_driver_name= "sha512-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -115,7 +114,6 @@ static struct shash_alg sha384_alg = {
.cra_name = "sha384",
.cra_driver_name= "sha384-s390",
.cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_sha_ctx),
.cra_module = THIS_MODULE,
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index a2945b289a29..3452e18bb1ca 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -497,7 +497,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
}
diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
rc = hypfs_create_str(cpu_dir, "type", buffer);
- return PTR_RET(rc);
+ return PTR_ERR_OR_ZERO(rc);
}
static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
@@ -544,7 +544,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
return PTR_ERR(rc);
diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
rc = hypfs_create_str(cpu_dir, "type", buffer);
- return PTR_RET(rc);
+ return PTR_ERR_OR_ZERO(rc);
}
static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 06b513d192b9..c681329fdeec 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -36,7 +36,7 @@ struct hypfs_sb_info {
kuid_t uid; /* uid used for files and dirs */
kgid_t gid; /* gid used for files and dirs */
struct dentry *update_file; /* file to trigger update */
- time_t last_update; /* last update time in secs since 1970 */
+ time64_t last_update; /* last update, CLOCK_MONOTONIC time */
struct mutex lock; /* lock to protect update process */
};
@@ -52,7 +52,7 @@ static void hypfs_update_update(struct super_block *sb)
struct hypfs_sb_info *sb_info = sb->s_fs_info;
struct inode *inode = d_inode(sb_info->update_file);
- sb_info->last_update = get_seconds();
+ sb_info->last_update = ktime_get_seconds();
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
}
@@ -179,7 +179,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
* to restart data collection in this case.
*/
mutex_lock(&fs_info->lock);
- if (fs_info->last_update == get_seconds()) {
+ if (fs_info->last_update == ktime_get_seconds()) {
rc = -EBUSY;
goto out;
}
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index c1bedb4c8de0..8c00fd509c45 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -47,6 +47,50 @@ struct ap_queue_status {
};
/**
+ * ap_intructions_available() - Test if AP instructions are available.
+ *
+ * Returns true if the AP instructions are installed, otherwise false.
+ */
+static inline bool ap_instructions_available(void)
+{
+ register unsigned long reg0 asm ("0") = AP_MKQID(0, 0);
+ register unsigned long reg1 asm ("1") = 0;
+ register unsigned long reg2 asm ("2") = 0;
+
+ asm volatile(
+ " .long 0xb2af0000\n" /* PQAP(TAPQ) */
+ "0: la %0,1\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (reg1), "+d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ return reg1 != 0;
+}
+
+/**
+ * ap_tapq(): Test adjunct processor queue.
+ * @qid: The AP queue number
+ * @info: Pointer to queue descriptor
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info)
+{
+ register unsigned long reg0 asm ("0") = qid;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm ("2");
+
+ asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */
+ : "=d" (reg1), "=d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ if (info)
+ *info = reg2;
+ return reg1;
+}
+
+/**
* ap_test_queue(): Test adjunct processor queue.
* @qid: The AP queue number
* @tbit: Test facilities bit
@@ -54,10 +98,57 @@ struct ap_queue_status {
*
* Returns AP queue status structure.
*/
-struct ap_queue_status ap_test_queue(ap_qid_t qid,
- int tbit,
- unsigned long *info);
+static inline struct ap_queue_status ap_test_queue(ap_qid_t qid,
+ int tbit,
+ unsigned long *info)
+{
+ if (tbit)
+ qid |= 1UL << 23; /* set T bit*/
+ return ap_tapq(qid, info);
+}
+/**
+ * ap_pqap_rapq(): Reset adjunct processor queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
+{
+ register unsigned long reg0 asm ("0") = qid | (1UL << 24);
+ register struct ap_queue_status reg1 asm ("1");
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(RAPQ) */
+ : "=d" (reg1)
+ : "d" (reg0)
+ : "cc");
+ return reg1;
+}
+
+/**
+ * ap_pqap_zapq(): Reset and zeroize adjunct processor queue.
+ * @qid: The AP queue number
+ *
+ * Returns AP queue status structure.
+ */
+static inline struct ap_queue_status ap_zapq(ap_qid_t qid)
+{
+ register unsigned long reg0 asm ("0") = qid | (2UL << 24);
+ register struct ap_queue_status reg1 asm ("1");
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(ZAPQ) */
+ : "=d" (reg1)
+ : "d" (reg0)
+ : "cc");
+ return reg1;
+}
+
+/**
+ * struct ap_config_info - convenience struct for AP crypto
+ * config info as returned by the ap_qci() function.
+ */
struct ap_config_info {
unsigned int apsc : 1; /* S bit */
unsigned int apxa : 1; /* N bit */
@@ -74,50 +165,189 @@ struct ap_config_info {
unsigned char _reserved4[16];
} __aligned(8);
-/*
- * ap_query_configuration(): Fetch cryptographic config info
+/**
+ * ap_qci(): Get AP configuration data
*
- * Returns the ap configuration info fetched via PQAP(QCI).
- * On success 0 is returned, on failure a negative errno
- * is returned, e.g. if the PQAP(QCI) instruction is not
- * available, the return value will be -EOPNOTSUPP.
+ * Returns 0 on success, or -EOPNOTSUPP.
*/
-int ap_query_configuration(struct ap_config_info *info);
+static inline int ap_qci(struct ap_config_info *config)
+{
+ register unsigned long reg0 asm ("0") = 4UL << 24;
+ register unsigned long reg1 asm ("1") = -EOPNOTSUPP;
+ register struct ap_config_info *reg2 asm ("2") = config;
+
+ asm volatile(
+ ".long 0xb2af0000\n" /* PQAP(QCI) */
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (reg1)
+ : "d" (reg0), "d" (reg2)
+ : "cc", "memory");
+
+ return reg1;
+}
/*
* struct ap_qirq_ctrl - convenient struct for easy invocation
- * of the ap_queue_irq_ctrl() function. This struct is passed
- * as GR1 parameter to the PQAP(AQIC) instruction. For details
- * please see the AR documentation.
+ * of the ap_aqic() function. This struct is passed as GR1
+ * parameter to the PQAP(AQIC) instruction. For details please
+ * see the AR documentation.
*/
struct ap_qirq_ctrl {
unsigned int _res1 : 8;
- unsigned int zone : 8; /* zone info */
- unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */
+ unsigned int zone : 8; /* zone info */
+ unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */
unsigned int _res2 : 4;
- unsigned int gisc : 3; /* guest isc field */
+ unsigned int gisc : 3; /* guest isc field */
unsigned int _res3 : 6;
- unsigned int gf : 2; /* gisa format */
+ unsigned int gf : 2; /* gisa format */
unsigned int _res4 : 1;
- unsigned int gisa : 27; /* gisa origin */
+ unsigned int gisa : 27; /* gisa origin */
unsigned int _res5 : 1;
- unsigned int isc : 3; /* irq sub class */
+ unsigned int isc : 3; /* irq sub class */
};
/**
- * ap_queue_irq_ctrl(): Control interruption on a AP queue.
+ * ap_aqic(): Control interruption for a specific AP.
* @qid: The AP queue number
- * @qirqctrl: struct ap_qirq_ctrl, see above
+ * @qirqctrl: struct ap_qirq_ctrl (64 bit value)
* @ind: The notification indicator byte
*
* Returns AP queue status.
+ */
+static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
+ struct ap_qirq_ctrl qirqctrl,
+ void *ind)
+{
+ register unsigned long reg0 asm ("0") = qid | (3UL << 24);
+ register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl;
+ register struct ap_queue_status reg1_out asm ("1");
+ register void *reg2 asm ("2") = ind;
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(AQIC) */
+ : "=d" (reg1_out)
+ : "d" (reg0), "d" (reg1_in), "d" (reg2)
+ : "cc");
+ return reg1_out;
+}
+
+/*
+ * union ap_qact_ap_info - used together with the
+ * ap_aqic() function to provide a convenient way
+ * to handle the ap info needed by the qact function.
+ */
+union ap_qact_ap_info {
+ unsigned long val;
+ struct {
+ unsigned int : 3;
+ unsigned int mode : 3;
+ unsigned int : 26;
+ unsigned int cat : 8;
+ unsigned int : 8;
+ unsigned char ver[2];
+ };
+};
+
+/**
+ * ap_qact(): Query AP combatibility type.
+ * @qid: The AP queue number
+ * @apinfo: On input the info about the AP queue. On output the
+ * alternate AP queue info provided by the qact function
+ * in GR2 is stored in.
*
- * Control interruption on the given AP queue.
- * Just a simple wrapper function for the low level PQAP(AQIC)
- * instruction available for other kernel modules.
+ * Returns AP queue status. Check response_code field for failures.
*/
-struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid,
- struct ap_qirq_ctrl qirqctrl,
- void *ind);
+static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit,
+ union ap_qact_ap_info *apinfo)
+{
+ register unsigned long reg0 asm ("0") = qid | (5UL << 24)
+ | ((ifbit & 0x01) << 22);
+ register unsigned long reg1_in asm ("1") = apinfo->val;
+ register struct ap_queue_status reg1_out asm ("1");
+ register unsigned long reg2 asm ("2");
+
+ asm volatile(
+ ".long 0xb2af0000" /* PQAP(QACT) */
+ : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2)
+ : "d" (reg0)
+ : "cc");
+ apinfo->val = reg2;
+ return reg1_out;
+}
+
+/**
+ * ap_nqap(): Send message to adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: The program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on NQAP can't happen because the L bit is 1.
+ * Condition code 2 on NQAP also means the send is incomplete,
+ * because a segment boundary was reached. The NQAP is repeated.
+ */
+static inline struct ap_queue_status ap_nqap(ap_qid_t qid,
+ unsigned long long psmid,
+ void *msg, size_t length)
+{
+ register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm ("2") = (unsigned long) msg;
+ register unsigned long reg3 asm ("3") = (unsigned long) length;
+ register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+ register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
+
+ asm volatile (
+ "0: .long 0xb2ad0042\n" /* NQAP */
+ " brc 2,0b"
+ : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+ : "d" (reg4), "d" (reg5)
+ : "cc", "memory");
+ return reg1;
+}
+
+/**
+ * ap_dqap(): Receive message from adjunct processor queue.
+ * @qid: The AP queue number
+ * @psmid: Pointer to program supplied message identifier
+ * @msg: The message text
+ * @length: The message length
+ *
+ * Returns AP queue status structure.
+ * Condition code 1 on DQAP means the receive has taken place
+ * but only partially. The response is incomplete, hence the
+ * DQAP is repeated.
+ * Condition code 2 on DQAP also means the receive is incomplete,
+ * this time because a segment boundary was reached. Again, the
+ * DQAP is repeated.
+ * Note that gpr2 is used by the DQAP instruction to keep track of
+ * any 'residual' length, in case the instruction gets interrupted.
+ * Hence it gets zeroed before the instruction.
+ */
+static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
+ unsigned long long *psmid,
+ void *msg, size_t length)
+{
+ register unsigned long reg0 asm("0") = qid | 0x80000000UL;
+ register struct ap_queue_status reg1 asm ("1");
+ register unsigned long reg2 asm("2") = 0UL;
+ register unsigned long reg4 asm("4") = (unsigned long) msg;
+ register unsigned long reg5 asm("5") = (unsigned long) length;
+ register unsigned long reg6 asm("6") = 0UL;
+ register unsigned long reg7 asm("7") = 0UL;
+
+
+ asm volatile(
+ "0: .long 0xb2ae0064\n" /* DQAP */
+ " brc 6,0b\n"
+ : "+d" (reg0), "=d" (reg1), "+d" (reg2),
+ "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7)
+ : : "cc", "memory");
+ *psmid = (((unsigned long long) reg6) << 32) + reg7;
+ return reg1;
+}
#endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 4b55532f15c4..fd20ab5d4cf7 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,17 +55,9 @@ static inline void atomic_add(int i, atomic_t *v)
__atomic_add(i, &v->counter);
}
-#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
-#define atomic_inc(_v) atomic_add(1, _v)
-#define atomic_inc_return(_v) atomic_add_return(1, _v)
-#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v)
-#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
-#define atomic_dec(_v) atomic_sub(1, _v)
-#define atomic_dec_return(_v) atomic_sub_return(1, _v)
-#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
#define ATOMIC_OPS(op) \
static inline void atomic_##op(int i, atomic_t *v) \
@@ -90,21 +82,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return __atomic_cmpxchg(&v->counter, old, new);
}
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == u))
- break;
- old = atomic_cmpxchg(v, c, c + a);
- if (likely(old == c))
- break;
- c = old;
- }
- return c;
-}
-
#define ATOMIC64_INIT(i) { (i) }
static inline long atomic64_read(const atomic64_t *v)
@@ -168,50 +145,8 @@ ATOMIC64_OPS(xor)
#undef ATOMIC64_OPS
-static inline int atomic64_add_unless(atomic64_t *v, long i, long u)
-{
- long c, old;
-
- c = atomic64_read(v);
- for (;;) {
- if (unlikely(c == u))
- break;
- old = atomic64_cmpxchg(v, c, c + i);
- if (likely(old == c))
- break;
- c = old;
- }
- return c != u;
-}
-
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
- long c, old, dec;
-
- c = atomic64_read(v);
- for (;;) {
- dec = c - 1;
- if (unlikely(dec < 0))
- break;
- old = atomic64_cmpxchg((v), c, dec);
- if (likely(old == c))
- break;
- c = old;
- }
- return dec;
-}
-
-#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0)
-#define atomic64_inc(_v) atomic64_add(1, _v)
-#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
-#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
-#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
-#define atomic64_dec(_v) atomic64_sub(1, _v)
-#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
-#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0)
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index de023a9a88ca..bf2cbff926ef 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -2,7 +2,7 @@
/*
* CPU-measurement facilities
*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012, 2018
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
@@ -139,8 +139,14 @@ struct hws_trailer_entry {
unsigned char timestamp[16]; /* 16 - 31 timestamp */
unsigned long long reserved1; /* 32 -Reserved */
unsigned long long reserved2; /* */
- unsigned long long progusage1; /* 48 - reserved for programming use */
- unsigned long long progusage2; /* */
+ union { /* 48 - reserved for programming use */
+ struct {
+ unsigned int clock_base:1; /* in progusage2 */
+ unsigned long long progusage1:63;
+ unsigned long long progusage2;
+ };
+ unsigned long long progusage[2];
+ };
} __packed;
/* Load program parameter */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 0563fd3e8458..480bb02ccacd 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -6,36 +6,38 @@
struct css_general_char {
u64 : 12;
- u32 dynio : 1; /* bit 12 */
- u32 : 4;
- u32 eadm : 1; /* bit 17 */
- u32 : 23;
- u32 aif : 1; /* bit 41 */
- u32 : 3;
- u32 mcss : 1; /* bit 45 */
- u32 fcs : 1; /* bit 46 */
- u32 : 1;
- u32 ext_mb : 1; /* bit 48 */
- u32 : 7;
- u32 aif_tdd : 1; /* bit 56 */
- u32 : 1;
- u32 qebsm : 1; /* bit 58 */
- u32 : 2;
- u32 aiv : 1; /* bit 61 */
- u32 : 5;
- u32 aif_osa : 1; /* bit 67 */
- u32 : 12;
- u32 eadm_rf : 1; /* bit 80 */
- u32 : 1;
- u32 cib : 1; /* bit 82 */
- u32 : 5;
- u32 fcx : 1; /* bit 88 */
- u32 : 19;
- u32 alt_ssi : 1; /* bit 108 */
- u32 : 1;
- u32 narf : 1; /* bit 110 */
- u32 : 12;
- u32 util_str : 1;/* bit 123 */
+ u64 dynio : 1; /* bit 12 */
+ u64 : 4;
+ u64 eadm : 1; /* bit 17 */
+ u64 : 23;
+ u64 aif : 1; /* bit 41 */
+ u64 : 3;
+ u64 mcss : 1; /* bit 45 */
+ u64 fcs : 1; /* bit 46 */
+ u64 : 1;
+ u64 ext_mb : 1; /* bit 48 */
+ u64 : 7;
+ u64 aif_tdd : 1; /* bit 56 */
+ u64 : 1;
+ u64 qebsm : 1; /* bit 58 */
+ u64 : 2;
+ u64 aiv : 1; /* bit 61 */
+ u64 : 2;
+
+ u64 : 3;
+ u64 aif_osa : 1; /* bit 67 */
+ u64 : 12;
+ u64 eadm_rf : 1; /* bit 80 */
+ u64 : 1;
+ u64 cib : 1; /* bit 82 */
+ u64 : 5;
+ u64 fcx : 1; /* bit 88 */
+ u64 : 19;
+ u64 alt_ssi : 1; /* bit 108 */
+ u64 : 1;
+ u64 narf : 1; /* bit 110 */
+ u64 : 12;
+ u64 util_str : 1;/* bit 123 */
} __packed;
extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index cfccc0edd00d..8ea270fdc7fb 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -4,7 +4,7 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
#define MCOUNT_INSN_SIZE 6
#else
#define MCOUNT_INSN_SIZE 24
@@ -42,7 +42,7 @@ struct ftrace_insn {
static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
/* brcl 0,0 */
insn->opc = 0xc004;
insn->disp = 0;
@@ -57,7 +57,7 @@ static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
static inline int is_ftrace_nop(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
if (insn->disp == 0)
return 1;
#else
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index e07cce88dfb0..fcbd638fb9f4 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,14 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+/* Generic bits for GMAP notification on DAT table entry changes. */
+#define GMAP_NOTIFY_SHADOW 0x2
+#define GMAP_NOTIFY_MPROT 0x1
+
+/* Status bits only for huge segment entries */
+#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
+
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
@@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
int gmap_mprotect_notify(struct gmap *, unsigned long start,
unsigned long len, int prot);
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr);
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 9c5fc50204dd..2d1afa58a4b6 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-#define arch_clear_hugepage_flags(page) do { } while (0)
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+ clear_bit(PG_arch_1, &page->flags);
+}
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 13de80cf741c..b106aa29bf55 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -68,8 +68,6 @@ struct kprobe_ctlblk {
unsigned long kprobe_saved_imask;
unsigned long kprobe_saved_ctl[3];
struct prev_kprobe prev_kprobe;
- struct pt_regs jprobe_saved_regs;
- kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
};
void arch_remove_kprobe(struct kprobe *p);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a2188e309bd6..29c940bf8506 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
__u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
seqcount_t cputm_seqcount;
__u64 cputm_start;
bool gs_enabled;
+ bool skey_enabled;
};
struct kvm_vm_stat {
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
-struct kvm_s390_migration_state {
- unsigned long bitmap_size; /* in bits (number of guest pages) */
- atomic64_t dirty_pages; /* number of dirty pages */
- unsigned long *pgste_bitmap;
-};
-
struct kvm_arch{
void *sca;
int use_esca;
@@ -828,7 +824,8 @@ struct kvm_arch{
struct kvm_s390_vsie vsie;
u8 epdx;
u64 epoch;
- struct kvm_s390_migration_state *migration_state;
+ int migration_mode;
+ atomic64_t cmma_dirty_pages;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
struct kvm_s390_gisa *gisa;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5bc888841eaf..406d940173ab 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -185,7 +185,7 @@ struct lowcore {
/* Transaction abort diagnostic block */
__u8 pgm_tdb[256]; /* 0x1800 */
__u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
-} __packed;
+} __packed __aligned(8192);
#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f5ff9dbad8ac..f31a15044c24 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -24,6 +24,8 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
+ /* The gmaps associated with this context are allowed to use huge pages. */
+ unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d16bc79c30bb..0717ee76885d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
+ mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
case _REGION2_SIZE:
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index a01f81186e86..123dac3717b3 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -8,7 +8,7 @@
#ifdef __ASSEMBLY__
-#ifdef CONFIG_EXPOLINE
+#ifdef CC_USING_EXPOLINE
_LC_BR_R1 = __LC_BR_R1
@@ -189,7 +189,7 @@ _LC_BR_R1 = __LC_BR_R1
.macro BASR_EX rsave,rtarget,ruse=%r1
basr \rsave,\rtarget
.endm
-#endif
+#endif /* CC_USING_EXPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 94f8db468c9b..10fe982f2b4b 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -51,6 +51,10 @@ struct zpci_fmb_fmt2 {
u64 max_work_units;
};
+struct zpci_fmb_fmt3 {
+ u64 tx_bytes;
+};
+
struct zpci_fmb {
u32 format : 8;
u32 fmt_ind : 24;
@@ -66,6 +70,7 @@ struct zpci_fmb {
struct zpci_fmb_fmt0 fmt0;
struct zpci_fmb_fmt1 fmt1;
struct zpci_fmb_fmt2 fmt2;
+ struct zpci_fmb_fmt3 fmt3;
};
} __packed __aligned(128);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ab636089c60..0e7cb0dc9c33 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr)
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
-#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
+#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
@@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
pte_t *sptep, pte_t *tptep, pte_t pte);
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
-bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
+ pte_t *ptep);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq);
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
@@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
unsigned long *oldpte, unsigned long *oldpgste);
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
/*
* Certain architectures need to do special things when PTEs
diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h
index 6090670df51f..e297bcfc476f 100644
--- a/arch/s390/include/asm/purgatory.h
+++ b/arch/s390/include/asm/purgatory.h
@@ -13,11 +13,5 @@
int verify_sha256_digest(void);
-extern u64 kernel_entry;
-extern u64 kernel_type;
-
-extern u64 crash_start;
-extern u64 crash_size;
-
#endif /* __ASSEMBLY__ */
#endif /* _S390_PURGATORY_H_ */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index de11ecc99c7c..9c9970a5dfb1 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -262,7 +262,6 @@ struct qdio_outbuf_state {
void *user;
};
-#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00
#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01
#define CHSC_AC1_INITIATE_INPUTQ 0x80
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 54f81f8ed662..724faede8ac5 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,6 +4,4 @@
#include <asm-generic/sections.h>
-extern char _ehead[];
-
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 9c30ebe046f3..1d66016f4170 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -9,8 +9,10 @@
#include <linux/const.h>
#include <uapi/asm/setup.h>
-
+#define EP_OFFSET 0x10008
+#define EP_STRING "S390EP"
#define PARMAREA 0x10400
+#define PARMAREA_END 0x11000
/*
* Machine features detected in early.c
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
index dc329aa03f76..83a574e95b3a 100644
--- a/arch/s390/include/uapi/asm/chsc.h
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -23,29 +23,29 @@ struct chsc_async_header {
__u32 key : 4;
__u32 : 28;
struct subchannel_id sid;
-} __attribute__ ((packed));
+};
struct chsc_async_area {
struct chsc_async_header header;
__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
-} __attribute__ ((packed));
+};
struct chsc_header {
__u16 length;
__u16 code;
-} __attribute__ ((packed));
+};
struct chsc_sync_area {
struct chsc_header header;
__u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
-} __attribute__ ((packed));
+};
struct chsc_response_struct {
__u16 length;
__u16 code;
__u32 parms;
__u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
-} __attribute__ ((packed));
+};
struct chsc_chp_cd {
struct chp_id chpid;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
/*
* KVM s390 specific structures and definitions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct {
__u64 reserved1[2];
__u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
};
};
};
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 3510c0fd06f4..39d901476ee5 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -111,4 +111,7 @@
#define SO_ZEROCOPY 60
+#define SO_TXTIME 61
+#define SCM_TXTIME SO_TXTIME
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index b62e0614e440..2bb1f3bb98ac 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -32,12 +32,12 @@
* - length(n_modulus) = inputdatalength
*/
struct ica_rsa_modexpo {
- char __user * inputdata;
- unsigned int inputdatalength;
- char __user * outputdata;
- unsigned int outputdatalength;
- char __user * b_key;
- char __user * n_modulus;
+ char __user *inputdata;
+ unsigned int inputdatalength;
+ char __user *outputdata;
+ unsigned int outputdatalength;
+ char __user *b_key;
+ char __user *n_modulus;
};
/**
@@ -55,15 +55,15 @@ struct ica_rsa_modexpo {
* - length(u_mult_inv) = inputdatalength/2 + 8
*/
struct ica_rsa_modexpo_crt {
- char __user * inputdata;
- unsigned int inputdatalength;
- char __user * outputdata;
- unsigned int outputdatalength;
- char __user * bp_key;
- char __user * bq_key;
- char __user * np_prime;
- char __user * nq_prime;
- char __user * u_mult_inv;
+ char __user *inputdata;
+ unsigned int inputdatalength;
+ char __user *outputdata;
+ unsigned int outputdatalength;
+ char __user *bp_key;
+ char __user *bq_key;
+ char __user *np_prime;
+ char __user *nq_prime;
+ char __user *u_mult_inv;
};
/**
@@ -93,18 +93,18 @@ struct CPRBX {
unsigned int req_extbl; /* request extension block len */
unsigned char pad_001[4]; /* reserved */
unsigned int rpld_extbl; /* replied extension block len */
- unsigned char padx000[16 - sizeof (char *)];
- unsigned char * req_parmb; /* request parm block 'address' */
- unsigned char padx001[16 - sizeof (char *)];
- unsigned char * req_datab; /* request data block 'address' */
- unsigned char padx002[16 - sizeof (char *)];
- unsigned char * rpl_parmb; /* reply parm block 'address' */
- unsigned char padx003[16 - sizeof (char *)];
- unsigned char * rpl_datab; /* reply data block 'address' */
- unsigned char padx004[16 - sizeof (char *)];
- unsigned char * req_extb; /* request extension block 'addr'*/
- unsigned char padx005[16 - sizeof (char *)];
- unsigned char * rpl_extb; /* reply extension block 'address'*/
+ unsigned char padx000[16 - sizeof(char *)];
+ unsigned char *req_parmb; /* request parm block 'address' */
+ unsigned char padx001[16 - sizeof(char *)];
+ unsigned char *req_datab; /* request data block 'address' */
+ unsigned char padx002[16 - sizeof(char *)];
+ unsigned char *rpl_parmb; /* reply parm block 'address' */
+ unsigned char padx003[16 - sizeof(char *)];
+ unsigned char *rpl_datab; /* reply data block 'address' */
+ unsigned char padx004[16 - sizeof(char *)];
+ unsigned char *req_extb; /* request extension block 'addr'*/
+ unsigned char padx005[16 - sizeof(char *)];
+ unsigned char *rpl_extb; /* reply extension block 'address'*/
unsigned short ccp_rtcode; /* server return code */
unsigned short ccp_rscode; /* server reason code */
unsigned int mac_data_len; /* Mac Data Length */
@@ -127,17 +127,17 @@ struct ica_xcRB {
unsigned int user_defined;
unsigned short request_ID;
unsigned int request_control_blk_length;
- unsigned char padding1[16 - sizeof (char *)];
- char __user * request_control_blk_addr;
+ unsigned char padding1[16 - sizeof(char *)];
+ char __user *request_control_blk_addr;
unsigned int request_data_length;
- char padding2[16 - sizeof (char *)];
- char __user * request_data_address;
+ char padding2[16 - sizeof(char *)];
+ char __user *request_data_address;
unsigned int reply_control_blk_length;
- char padding3[16 - sizeof (char *)];
- char __user * reply_control_blk_addr;
+ char padding3[16 - sizeof(char *)];
+ char __user *reply_control_blk_addr;
unsigned int reply_data_length;
- char padding4[16 - sizeof (char *)];
- char __user * reply_data_addr;
+ char padding4[16 - sizeof(char *)];
+ char __user *reply_data_addr;
unsigned short priority_window;
unsigned int status;
} __attribute__((packed));
@@ -233,7 +233,7 @@ struct zcrypt_device_matrix_ext {
struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
};
-#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
+#define AUTOSELECT 0xFFFFFFFF
#define ZCRYPT_IOCTL_MAGIC 'z'
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2fed39b26b42..dbfd1730e631 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -9,39 +9,21 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
-CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE)
endif
-GCOV_PROFILE_als.o := n
GCOV_PROFILE_early.o := n
GCOV_PROFILE_early_nobss.o := n
-KCOV_INSTRUMENT_als.o := n
KCOV_INSTRUMENT_early.o := n
KCOV_INSTRUMENT_early_nobss.o := n
-UBSAN_SANITIZE_als.o := n
UBSAN_SANITIZE_early.o := n
UBSAN_SANITIZE_early_nobss.o := n
#
-# Use -march=z900 for als.c to be able to print an error
-# message if the kernel is started on a machine which is too old
-#
-ifneq ($(CC_FLAGS_MARCH),-march=z900)
-CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE)
-CFLAGS_als.o += -march=z900
-AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += -march=z900
-endif
-
-CFLAGS_als.o += -D__NO_FORTIFY
-
-#
# Passing null pointers is ok for smp code, since we access the lowcore here.
#
CFLAGS_smp.o := -Wno-nonnull
@@ -61,13 +43,13 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o
-extra-y += head.o head64.o vmlinux.lds
+extra-y += head64.o vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
@@ -99,5 +81,5 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-y += vdso64/
obj-$(CONFIG_COMPAT) += vdso32/
-chkbss := head.o head64.o als.o early_nobss.o
+chkbss := head64.o early_nobss.o
include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 11aea745a2a6..66e830f1c7bf 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -17,14 +17,6 @@
#include <asm/gmap.h>
#include <asm/nmi.h>
-/*
- * Make sure that the compiler is new enough. We want a compiler that
- * is known to work with the "Q" assembler constraint.
- */
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
-#error Your compiler is too old; please use version 4.3 or newer
-#endif
-
int main(void)
{
/* task struct offsets */
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 607c5e9fba3d..2ce28bf0c5ec 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -183,3 +183,4 @@ COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags);
COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags)
+COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 9f5ea9d87069..376f6b6dfb3c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -293,17 +293,13 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
prot);
}
-/*
- * Alloc memory and panic in case of ENOMEM
- */
-static void *kzalloc_panic(int len)
+static const char *nt_name(Elf64_Word type)
{
- void *rc;
+ const char *name = "LINUX";
- rc = kzalloc(len, GFP_KERNEL);
- if (!rc)
- panic("s390 kdump kzalloc (%d) failed", len);
- return rc;
+ if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
+ name = KEXEC_CORE_NOTE_NAME;
+ return name;
}
/*
@@ -332,11 +328,26 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
{
- const char *note_name = "LINUX";
+ return nt_init_name(buf, type, desc, d_len, nt_name(type));
+}
- if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
- note_name = KEXEC_CORE_NOTE_NAME;
- return nt_init_name(buf, type, desc, d_len, note_name);
+/*
+ * Calculate the size of ELF note
+ */
+static size_t nt_size_name(int d_len, const char *name)
+{
+ size_t size;
+
+ size = sizeof(Elf64_Nhdr);
+ size += roundup(strlen(name) + 1, 4);
+ size += roundup(d_len, 4);
+
+ return size;
+}
+
+static inline size_t nt_size(Elf64_Word type, int d_len)
+{
+ return nt_size_name(d_len, nt_name(type));
}
/*
@@ -375,6 +386,29 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
}
/*
+ * Calculate size of ELF notes per cpu
+ */
+static size_t get_cpu_elf_notes_size(void)
+{
+ struct save_area *sa = NULL;
+ size_t size;
+
+ size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
+ size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
+ size += nt_size(NT_S390_TIMER, sizeof(sa->timer));
+ size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
+ size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
+ size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
+ size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+ if (MACHINE_HAS_VX) {
+ size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
+ size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+ }
+
+ return size;
+}
+
+/*
* Initialize prpsinfo note (new kernel)
*/
static void *nt_prpsinfo(void *ptr)
@@ -404,11 +438,15 @@ static void *get_vmcoreinfo_old(unsigned long *size)
if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
sizeof(nt_name) - 1))
return NULL;
- if (strcmp(nt_name, "VMCOREINFO") != 0)
+ if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0)
return NULL;
- vmcoreinfo = kzalloc_panic(note.n_descsz);
- if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz))
+ vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL);
+ if (!vmcoreinfo)
return NULL;
+ if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) {
+ kfree(vmcoreinfo);
+ return NULL;
+ }
*size = note.n_descsz;
return vmcoreinfo;
}
@@ -418,15 +456,38 @@ static void *get_vmcoreinfo_old(unsigned long *size)
*/
static void *nt_vmcoreinfo(void *ptr)
{
+ const char *name = VMCOREINFO_NOTE_NAME;
unsigned long size;
void *vmcoreinfo;
vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
- if (!vmcoreinfo)
- vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (vmcoreinfo)
+ return nt_init_name(ptr, 0, vmcoreinfo, size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
if (!vmcoreinfo)
return ptr;
- return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+ ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name);
+ kfree(vmcoreinfo);
+ return ptr;
+}
+
+static size_t nt_vmcoreinfo_size(void)
+{
+ const char *name = VMCOREINFO_NOTE_NAME;
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (vmcoreinfo)
+ return nt_size_name(size, name);
+
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return 0;
+
+ kfree(vmcoreinfo);
+ return nt_size_name(size, name);
}
/*
@@ -539,6 +600,27 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
return ptr;
}
+static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+{
+ size_t size;
+
+ size = sizeof(Elf64_Ehdr);
+ /* PT_NOTES */
+ size += sizeof(Elf64_Phdr);
+ /* nt_prpsinfo */
+ size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+ /* regsets */
+ size += get_cpu_cnt() * get_cpu_elf_notes_size();
+ /* nt_vmcoreinfo */
+ size += nt_vmcoreinfo_size();
+ /* nt_final */
+ size += sizeof(Elf64_Nhdr);
+ /* PT_LOADS */
+ size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+
+ return size;
+}
+
/*
* Create ELF core header (new kernel)
*/
@@ -566,9 +648,17 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
mem_chunk_cnt = get_mem_chunk_cnt();
- alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 +
- mem_chunk_cnt * sizeof(Elf64_Phdr);
- hdr = kzalloc_panic(alloc_size);
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+
+ hdr = kzalloc(alloc_size, GFP_KERNEL);
+
+ /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ * a dump with this crash kernel will fail. Panic now to allow other
+ * dump mechanisms to take over.
+ */
+ if (!hdr)
+ panic("s390 kdump allocating elfcorehdr failed");
+
/* Init elf header */
ptr = ehdr_init(hdr, mem_chunk_cnt);
/* Init program headers */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 827699eb48fa..5b28b434f8a1 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -331,8 +331,20 @@ static void __init setup_boot_command_line(void)
append_to_cmdline(append_ipl_scpdata);
}
+static void __init check_image_bootable(void)
+{
+ if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING)))
+ return;
+
+ sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
+ sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
+ sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
+ disabled_wait(0xbadb007);
+}
+
void __init startup_init(void)
{
+ check_image_bootable();
time_early_init();
init_kernel_storage_key();
lockdep_off();
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index c15caeab1dbf..7f8246c9be08 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -111,15 +111,15 @@ __u8 _ebcasc[256] =
0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
/* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
- /* 0x40 SP RSP ä ---- */
+ /* 0x40 SP RSP ä ---- */
0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
/* 0x48 . < ( + | */
0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
/* 0x50 & ---- */
0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
- /* 0x58 ß ! $ * ) ; */
+ /* 0x58 ß ! $ * ) ; */
0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
- /* 0x60 - / ---- Ä ---- ---- ---- */
+ /* 0x60 - / ---- Ä ---- ---- ---- */
0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
/* 0x68 ---- , % _ > ? */
0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
@@ -131,7 +131,7 @@ __u8 _ebcasc[256] =
0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
/* 0x88 h i ---- ---- ---- */
0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
- /* 0x90 ° j k l m n o p */
+ /* 0x90 ° j k l m n o p */
0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
/* 0x98 q r ---- ---- */
0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
@@ -139,25 +139,25 @@ __u8 _ebcasc[256] =
0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
/* 0xA8 y z ---- ---- ---- ---- */
0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
- /* 0xB0 ^ ---- § ---- */
+ /* 0xB0 ^ ---- § ---- */
0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
/* 0xB8 ---- [ ] ---- ---- ---- ---- */
0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
/* 0xC0 { A B C D E F G */
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- /* 0xC8 H I ---- ö ---- */
+ /* 0xC8 H I ---- ö ---- */
0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
/* 0xD0 } J K L M N O P */
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
- /* 0xD8 Q R ---- ü */
+ /* 0xD8 Q R ---- ü */
0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
/* 0xE0 \ S T U V W X */
0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
/* 0xF0 0 1 2 3 4 5 6 7 */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
};
@@ -260,15 +260,15 @@ __u8 _ebcasc_500[256] =
0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
/* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
- /* 0x40 SP RSP ä ---- */
+ /* 0x40 SP RSP ä ---- */
0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
/* 0x48 [ . < ( + ! */
0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
/* 0x50 & ---- */
0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
- /* 0x58 ß ] $ * ) ; ^ */
+ /* 0x58 ß ] $ * ) ; ^ */
0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
- /* 0x60 - / ---- Ä ---- ---- ---- */
+ /* 0x60 - / ---- Ä ---- ---- ---- */
0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
/* 0x68 ---- , % _ > ? */
0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
@@ -280,7 +280,7 @@ __u8 _ebcasc_500[256] =
0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
/* 0x88 h i ---- ---- ---- */
0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
- /* 0x90 ° j k l m n o p */
+ /* 0x90 ° j k l m n o p */
0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
/* 0x98 q r ---- ---- */
0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
@@ -288,25 +288,25 @@ __u8 _ebcasc_500[256] =
0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
/* 0xA8 y z ---- ---- ---- ---- */
0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
- /* 0xB0 ---- § ---- */
+ /* 0xB0 ---- § ---- */
0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
/* 0xB8 ---- | ---- ---- ---- ---- */
0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
/* 0xC0 { A B C D E F G */
0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- /* 0xC8 H I ---- ö ---- */
+ /* 0xC8 H I ---- ö ---- */
0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
/* 0xD0 } J K L M N O P */
0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
- /* 0xD8 Q R ---- ü */
+ /* 0xD8 Q R ---- ü */
0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
/* 0xE0 \ S T U V W X */
0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
/* 0xF0 0 1 2 3 4 5 6 7 */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
};
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f03402efab4b..150130c897c3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -357,6 +357,10 @@ ENTRY(system_call)
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_return:
+#ifdef CONFIG_DEBUG_RSEQ
+ lgr %r2,%r11
+ brasl %r14,rseq_syscall
+#endif
LOCKDEP_SYS_EXIT
.Lsysc_tif:
TSTMSK __PT_FLAGS(%r11),_PIF_WORK
@@ -1265,7 +1269,7 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
-0: BR_EX %r14
+0: BR_EX %r14,%r11
.align 8
.Lcleanup_table:
@@ -1301,7 +1305,7 @@ cleanup_critical:
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
- BR_EX %r14
+ BR_EX %r14,%r11
#endif
.Lcleanup_system_call:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 961abfac2c5f..472fa2f1a4a5 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user
DECLARE_PER_CPU(u64, mt_cycles[8]);
-void verify_facilities(void);
void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index dc76d813e420..84be7f02d0c2 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -61,7 +61,7 @@ unsigned long ftrace_plt;
static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
{
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
/* brcl 0,0 */
insn->opc = 0xc004;
insn->disp = 0;
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 791cb9000e86..6d14ad42ba88 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -48,11 +48,23 @@ ENTRY(startup_continue)
# Early machine initialization and detection functions.
#
brasl %r14,startup_init
- lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space,
- # virtual and never return ...
+
+# check control registers
+ stctg %c0,%c15,0(%r15)
+ oi 6(%r15),0x60 # enable sigp emergency & external call
+ oi 4(%r15),0x10 # switch on low address proctection
+ lctlg %c0,%c15,0(%r15)
+
+ lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess
+ brasl %r14,start_kernel # go to C code
+#
+# We returned from start_kernel ?!? PANIK
+#
+ basr %r13,0
+ lpswe .Ldw-.(%r13) # load disabled wait psw
+
.align 16
.LPG1:
-.Lentry:.quad 0x0000000180000000,_stext
.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
.quad 0 # cr1: primary space segment table
.quad .Lduct # cr2: dispatchable unit control table
@@ -85,30 +97,5 @@ ENTRY(startup_continue)
.endr
.Llinkage_stack:
.long 0,0,0x89000000,0,0,0,0x8a000000,0
-
-ENTRY(_ehead)
-
- .org 0x100000 - 0x11000 # head.o ends at 0x11000
-#
-# startup-code, running in absolute addressing mode
-#
-ENTRY(_stext)
- basr %r13,0 # get base
-.LPG3:
-# check control registers
- stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x60 # enable sigp emergency & external call
- oi 4(%r15),0x10 # switch on low address proctection
- lctlg %c0,%c15,0(%r15)
-
- lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess
- brasl %r14,start_kernel # go to C code
-#
-# We returned from start_kernel ?!? PANIK
-#
- basr %r13,0
- lpswe .Ldw-.(%r13) # load disabled wait psw
-
- .align 8
.Ldw: .quad 0x0002000180000000,0x0000000000000000
.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 60f60afa645c..7c0a095e9c5f 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -321,38 +321,20 @@ static int kprobe_handler(struct pt_regs *regs)
* If we have no pre-handler or it returned 0, we
* continue with single stepping. If we have a
* pre-handler and it returned non-zero, it prepped
- * for calling the break_handler below on re-entry
- * for jprobe processing, so get out doing nothing
- * more here.
+ * for changing execution path, so get out doing
+ * nothing more here.
*/
push_kprobe(kcb, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (p->pre_handler && p->pre_handler(p, regs))
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
return 1;
+ }
kcb->kprobe_status = KPROBE_HIT_SS;
}
enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
return 1;
- } else if (kprobe_running()) {
- p = __this_cpu_read(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- /*
- * Continuation after the jprobe completed and
- * caused the jprobe_return trap. The jprobe
- * break_handler "returns" to the original
- * function that still has the kprobe breakpoint
- * installed. We continue with single stepping.
- */
- kcb->kprobe_status = KPROBE_HIT_SS;
- enable_singlestep(kcb, regs,
- (unsigned long) p->ainsn.insn);
- return 1;
- } /* else:
- * No kprobe at this address and the current kprobe
- * has no break handler (no jprobe!). The kernel just
- * exploded, let the standard trap handler pick up the
- * pieces.
- */
} /* else:
* No kprobe at this address and no active kprobe. The trap has
* not been caused by a kprobe breakpoint. The race of breakpoint
@@ -452,9 +434,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
regs->psw.addr = orig_ret_address;
- pop_kprobe(get_kprobe_ctlblk());
kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
@@ -661,60 +641,6 @@ int kprobe_exceptions_notify(struct notifier_block *self,
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long stack;
-
- memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
-
- /* setup return addr to the jprobe handler routine */
- regs->psw.addr = (unsigned long) jp->entry;
- regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
-
- /* r15 is the stack pointer */
- stack = (unsigned long) regs->gprs[15];
-
- memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
-
- /*
- * jprobes use jprobe_return() which skips the normal return
- * path of the function, and this messes up the accounting of the
- * function graph tracer to get messed up.
- *
- * Pause function graph tracing while performing the jprobe function.
- */
- pause_graph_tracing();
- return 1;
-}
-NOKPROBE_SYMBOL(setjmp_pre_handler);
-
-void jprobe_return(void)
-{
- asm volatile(".word 0x0002");
-}
-NOKPROBE_SYMBOL(jprobe_return);
-
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long stack;
-
- /* It's OK to start function graph tracing again */
- unpause_graph_tracing();
-
- stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
-
- /* Put the regs back */
- memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
- /* put the stack back */
- memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
- preempt_enable_no_resched();
- return 1;
-}
-NOKPROBE_SYMBOL(longjmp_break_handler);
-
static struct kprobe trampoline = {
.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
.pre_handler = trampoline_probe_handler
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 27110f3294ed..e93fbf02490c 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -35,7 +35,7 @@ ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
lgr %r1,%r15
-#ifndef CC_USING_HOTPATCH
+#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
#endif
aghi %r15,-STACK_FRAME_SIZE
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 18ae7b9c71d6..bdddaae96559 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -35,6 +35,8 @@ early_param("nospec", nospec_setup_early);
static int __init nospec_report(void)
{
+ if (test_facility(156))
+ pr_info("Spectre V2 mitigation: etokens\n");
if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
pr_info("Spectre V2 mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
@@ -56,7 +58,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
void __init nospec_auto_detect(void)
{
- if (IS_ENABLED(CC_USING_EXPOLINE)) {
+ if (test_facility(156)) {
+ /*
+ * The machine supports etokens.
+ * Disable expolines and disable nobp.
+ */
+ if (IS_ENABLED(CC_USING_EXPOLINE))
+ nospec_disable = 1;
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ } else if (IS_ENABLED(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 8affad5f18cb..e30e580ae362 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -13,6 +13,8 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ if (test_facility(156))
+ return sprintf(buf, "Mitigation: etokens\n");
if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
return sprintf(buf, "Mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0292d68e7dde..5c53e977be62 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -2,7 +2,7 @@
/*
* Performance event support for the System z CPU-measurement Sampling Facility
*
- * Copyright IBM Corp. 2013
+ * Copyright IBM Corp. 2013, 2018
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
#define KMSG_COMPONENT "cpum_sf"
@@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
goto out;
/* Update the process ID (see also kernel/events/core.c) */
- data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+ data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
perf_output_sample(&handle, &header, data, event);
@@ -1587,6 +1587,17 @@ static void aux_buffer_free(void *data)
"%lu SDBTs\n", num_sdbt);
}
+static void aux_sdb_init(unsigned long sdb)
+{
+ struct hws_trailer_entry *te;
+
+ te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+
+ /* Save clock base */
+ te->clock_base = 1;
+ memcpy(&te->progusage2, &tod_clock_base[1], 8);
+}
+
/*
* aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
* @cpu: On which to allocate, -1 means current
@@ -1666,6 +1677,7 @@ static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
/* Tail is the entry in a SDBT */
*tail = (unsigned long)pages[i];
aux->sdb_index[i] = (unsigned long)pages[i];
+ aux_sdb_init((unsigned long)pages[i]);
}
sfb->num_sdb = nr_pages;
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 54e2d634b849..4352a504f235 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -12,9 +12,6 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
{
freg_t fp;
- if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX))
- return 0;
-
if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
return regs->gprs[idx];
@@ -33,7 +30,8 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (idx == PERF_REG_S390_PC)
return regs->psw.addr;
- return regs->gprs[idx];
+ WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX);
+ return 0;
}
#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index d82a9ec64ea9..c637c12f9e37 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -674,12 +674,12 @@ static void __init reserve_kernel(void)
#ifdef CONFIG_DMA_API_DEBUG
/*
* DMA_API_DEBUG code stumbles over addresses from the
- * range [_ehead, _stext]. Mark the memory as reserved
+ * range [PARMAREA_END, _stext]. Mark the memory as reserved
* so it is not used for CONFIG_DMA_API_DEBUG=y.
*/
memblock_reserve(0, PFN_PHYS(start_pfn));
#else
- memblock_reserve(0, (unsigned long)_ehead);
+ memblock_reserve(0, PARMAREA_END);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
#endif
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 2d2960ab3e10..22f08245aa5d 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -498,7 +498,7 @@ void do_signal(struct pt_regs *regs)
}
/* No longer in a system call */
clear_pt_regs_flag(regs, PIF_SYSCALL);
-
+ rseq_signal_deliver(&ksig, regs);
if (is_compat_task())
handle_signal32(&ksig, oldset, regs);
else
@@ -537,4 +537,5 @@ void do_notify_resume(struct pt_regs *regs)
{
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ rseq_handle_notify_resume(NULL, regs);
}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index 8ff96c08955f..4d929edc80a6 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -25,15 +25,15 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
define filechk_syshdr
- $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2"
+ $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
endef
define filechk_sysnr
- $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget))
+ $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
endef
define filechk_syscalls
- $(CONFIG_SHELL) '$(systbl)' -S
+ $(CONFIG_SHELL) '$(systbl)' -S < $<
endef
syshdr_abi_unistd_32 := common,32
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 8b210ead7956..022fc099b628 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -389,3 +389,5 @@
379 common statx sys_statx compat_sys_statx
380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi
381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load
+382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
+383 common rseq sys_rseq compat_sys_rseq
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 54f5496913fa..12f80d1f0415 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
}
EXPORT_SYMBOL(stsi);
+#ifdef CONFIG_PROC_FS
+
static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
{
switch (encoding) {
@@ -301,6 +303,8 @@ static int __init sysinfo_create_proc(void)
}
device_initcall(sysinfo_create_proc);
+#endif /* CONFIG_PROC_FS */
+
/*
* Service levels interface.
*/
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index cf561160ea88..e8766beee5ad 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -221,17 +221,22 @@ void read_persistent_clock64(struct timespec64 *ts)
ext_to_timespec64(clk, ts);
}
-void read_boot_clock64(struct timespec64 *ts)
+void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+ struct timespec64 *boot_offset)
{
unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ struct timespec64 boot_time;
__u64 delta;
delta = initial_leap_seconds + TOD_UNIX_EPOCH;
- memcpy(clk, tod_clock_base, 16);
- *(__u64 *) &clk[1] -= delta;
- if (*(__u64 *) &clk[1] > delta)
+ memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE);
+ *(__u64 *)&clk[1] -= delta;
+ if (*(__u64 *)&clk[1] > delta)
clk[0]--;
- ext_to_timespec64(clk, ts);
+ ext_to_timespec64(clk, &boot_time);
+
+ read_persistent_clock64(wall_time);
+ *boot_offset = timespec64_sub(*wall_time, boot_time);
}
static u64 read_tod_clock(struct clocksource *cs)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 4b6e0397f66d..e8184a15578a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -579,41 +579,33 @@ early_param("topology", topology_setup);
static int topology_ctl_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- unsigned int len;
+ int enabled = topology_is_enabled();
int new_mode;
- char buf[2];
+ int zero = 0;
+ int one = 1;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &enabled,
+ .maxlen = sizeof(int),
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
+
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
- if (!*lenp || *ppos) {
- *lenp = 0;
- return 0;
- }
- if (!write) {
- strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
- ARRAY_SIZE(buf));
- len = strnlen(buf, ARRAY_SIZE(buf));
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
- goto out;
- }
- len = *lenp;
- if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
- return -EFAULT;
- if (buf[0] != '0' && buf[0] != '1')
- return -EINVAL;
mutex_lock(&smp_cpu_state_mutex);
- new_mode = topology_get_mode(buf[0] == '1');
+ new_mode = topology_get_mode(enabled);
if (topology_mode != new_mode) {
topology_mode = new_mode;
topology_schedule_update();
}
mutex_unlock(&smp_cpu_state_mutex);
topology_flush_work();
-out:
- *lenp = len;
- *ppos += len;
- return 0;
+
+ return rc;
}
static struct ctl_table topology_ctl_table[] = {
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 09abae40f917..3031cc6dd0ab 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -47,7 +47,7 @@ static struct page **vdso64_pagelist;
*/
unsigned int __read_mostly vdso_enabled = 1;
-static int vdso_fault(const struct vm_special_mapping *sm,
+static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page **vdso_pagelist;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index f0414f52817b..b43f8d33a369 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -19,7 +19,7 @@
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(startup)
+ENTRY(startup_continue)
jiffies = jiffies_64;
PHDRS {
@@ -30,16 +30,12 @@ PHDRS {
SECTIONS
{
- . = 0x00000000;
+ . = 0x100000;
+ _stext = .; /* Start of text section */
.text : {
/* Text and read-only data */
+ _text = .;
HEAD_TEXT
- /*
- * E.g. perf doesn't like symbols starting at address zero,
- * therefore skip the initial PSW and channel program located
- * at address zero and let _text start at 0x200.
- */
- _text = 0x200;
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
@@ -47,6 +43,7 @@ SECTIONS
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ *(.text.*_indirect_*)
*(.fixup)
*(.gnu.warning)
} :text = 0x0700
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index daa09f89ca2d..fcb55b02990e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
* yield-candidate.
*/
vcpu->preempted = true;
- swake_up(&vcpu->wq);
+ swake_up_one(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
/*
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3b7a5151b6a5..91ad4a9425c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -172,6 +172,10 @@ static int nested;
module_param(nested, int, S_IRUGO);
MODULE_PARM_DESC(nested, "Nested virtualization support");
+/* allow 1m huge page guest backing, if !nested */
+static int hpage;
+module_param(hpage, int, 0444);
+MODULE_PARM_DESC(hpage, "1m huge page backing support");
/*
* For now we handle at most 16 double words as this is what the s390 base
@@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
r = 1;
break;
+ case KVM_CAP_S390_HPAGE_1M:
+ r = 0;
+ if (hpage)
+ r = 1;
+ break;
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
@@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot)
{
+ int i;
gfn_t cur_gfn, last_gfn;
- unsigned long address;
+ unsigned long gaddr, vmaddr;
struct gmap *gmap = kvm->arch.gmap;
+ DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
- /* Loop over all guest pages */
+ /* Loop over all guest segments */
+ cur_gfn = memslot->base_gfn;
last_gfn = memslot->base_gfn + memslot->npages;
- for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
- address = gfn_to_hva_memslot(memslot, cur_gfn);
+ for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
+ gaddr = gfn_to_gpa(cur_gfn);
+ vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
+ if (kvm_is_error_hva(vmaddr))
+ continue;
+
+ bitmap_zero(bitmap, _PAGE_ENTRIES);
+ gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
+ for (i = 0; i < _PAGE_ENTRIES; i++) {
+ if (test_bit(i, bitmap))
+ mark_page_dirty(kvm, cur_gfn + i);
+ }
- if (test_and_clear_guest_dirty(gmap->mm, address))
- mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
@@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
r ? "(not available)" : "(success)");
break;
+ case KVM_CAP_S390_HPAGE_1M:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus)
+ r = -EBUSY;
+ else if (!hpage || kvm->arch.use_cmma)
+ r = -EINVAL;
+ else {
+ r = 0;
+ kvm->mm->context.allow_gmap_hpage_1m = 1;
+ /*
+ * We might have to create fake 4k page
+ * tables. To avoid that the hardware works on
+ * stale PGSTEs, we emulate these instructions.
+ */
+ kvm->arch.use_skf = 0;
+ kvm->arch.use_pfmfi = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
+ r ? "(not available)" : "(success)");
+ break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!sclp.has_cmma)
break;
- ret = -EBUSY;
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock);
- if (!kvm->created_vcpus) {
+ if (kvm->created_vcpus)
+ ret = -EBUSY;
+ else if (kvm->mm->context.allow_gmap_hpage_1m)
+ ret = -EINVAL;
+ else {
kvm->arch.use_cmma = 1;
/* Not compatible with cmma. */
kvm->arch.use_pfmfi = 0;
@@ -862,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
*/
static int kvm_s390_vm_start_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
struct kvm_memory_slot *ms;
- /* should be the only one */
struct kvm_memslots *slots;
- unsigned long ram_pages;
+ unsigned long ram_pages = 0;
int slotnr;
/* migration mode already enabled */
- if (kvm->arch.migration_state)
+ if (kvm->arch.migration_mode)
return 0;
-
slots = kvm_memslots(kvm);
if (!slots || !slots->used_slots)
return -EINVAL;
- mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
- if (!mgs)
- return -ENOMEM;
- kvm->arch.migration_state = mgs;
-
- if (kvm->arch.use_cmma) {
+ if (!kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 1;
+ return 0;
+ }
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
/*
- * Get the first slot. They are reverse sorted by base_gfn, so
- * the first slot is also the one at the end of the address
- * space. We have verified above that at least one slot is
- * present.
+ * The second half of the bitmap is only used on x86,
+ * and would be wasted otherwise, so we put it to good
+ * use here to keep track of the state of the storage
+ * attributes.
*/
- ms = slots->memslots;
- /* round up so we only use full longs */
- ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
- /* allocate enough bytes to store all the bits */
- mgs->pgste_bitmap = vmalloc(ram_pages / 8);
- if (!mgs->pgste_bitmap) {
- kfree(mgs);
- kvm->arch.migration_state = NULL;
- return -ENOMEM;
- }
-
- mgs->bitmap_size = ram_pages;
- atomic64_set(&mgs->dirty_pages, ram_pages);
- /* mark all the pages in active slots as dirty */
- for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
- ms = slots->memslots + slotnr;
- bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
- }
-
- kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+ ram_pages += ms->npages;
}
+ atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+ kvm->arch.migration_mode = 1;
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
return 0;
}
@@ -919,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
*/
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
-
/* migration mode already disabled */
- if (!kvm->arch.migration_state)
+ if (!kvm->arch.migration_mode)
return 0;
- mgs = kvm->arch.migration_state;
- kvm->arch.migration_state = NULL;
-
- if (kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 0;
+ if (kvm->arch.use_cmma)
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
- /* We have to wait for the essa emulation to finish */
- synchronize_srcu(&kvm->srcu);
- vfree(mgs->pgste_bitmap);
- }
- kfree(mgs);
return 0;
}
@@ -961,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
static int kvm_s390_vm_get_migration(struct kvm *kvm,
struct kvm_device_attr *attr)
{
- u64 mig = (kvm->arch.migration_state != NULL);
+ u64 mig = kvm->arch.migration_mode;
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
return -ENXIO;
@@ -1540,6 +1558,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
uint8_t *keys;
uint64_t hva;
int srcu_idx, i, r = 0;
+ bool unlocked;
if (args->flags != 0)
return -EINVAL;
@@ -1564,9 +1583,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
goto out;
+ i = 0;
down_read(&current->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
- for (i = 0; i < args->count; i++) {
+ while (i < args->count) {
+ unlocked = false;
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
@@ -1580,8 +1601,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
}
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
- if (r)
- break;
+ if (r) {
+ r = fixup_user_fault(current, current->mm, hva,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (r)
+ break;
+ }
+ if (!r)
+ i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&current->mm->mmap_sem);
@@ -1600,6 +1627,134 @@ out:
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
/*
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
+ * address falls in a hole. In that case the index of one of the memslots
+ * bordering the hole is returned.
+ */
+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
+{
+ int start = 0, end = slots->used_slots;
+ int slot = atomic_read(&slots->lru_slot);
+ struct kvm_memory_slot *memslots = slots->memslots;
+
+ if (gfn >= memslots[slot].base_gfn &&
+ gfn < memslots[slot].base_gfn + memslots[slot].npages)
+ return slot;
+
+ while (start < end) {
+ slot = start + (end - start) / 2;
+
+ if (gfn >= memslots[slot].base_gfn)
+ end = slot;
+ else
+ start = slot + 1;
+ }
+
+ if (gfn >= memslots[start].base_gfn &&
+ gfn < memslots[start].base_gfn + memslots[start].npages) {
+ atomic_set(&slots->lru_slot, start);
+ }
+
+ return start;
+}
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+ args->count = 0;
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ /*
+ * We return an error if the first value was invalid, but we
+ * return successfully if at least one value was copied.
+ */
+ if (kvm_is_error_hva(hva))
+ return args->count ? 0 : -EFAULT;
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ cur_gfn++;
+ }
+
+ return 0;
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+ unsigned long cur_gfn)
+{
+ int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
+ struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ unsigned long ofs = cur_gfn - ms->base_gfn;
+
+ if (ms->base_gfn + ms->npages <= cur_gfn) {
+ slotidx--;
+ /* If we are above the highest slot, wrap around */
+ if (slotidx < 0)
+ slotidx = slots->used_slots - 1;
+
+ ms = slots->memslots + slotidx;
+ ofs = 0;
+ }
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+ while ((slotidx > 0) && (ofs >= ms->npages)) {
+ slotidx--;
+ ms = slots->memslots + slotidx;
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ }
+ return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *ms;
+
+ cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ args->count = 0;
+ args->start_gfn = cur_gfn;
+ if (!ms)
+ return 0;
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ if (kvm_is_error_hva(hva))
+ return 0;
+ /* Decrement only if we actually flipped the bit to 0 */
+ if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_dec(&kvm->arch.cmma_dirty_pages);
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ /* Save the value */
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ /* If the next bit is too far away, stop. */
+ if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
+ return 0;
+ /* If we reached the previous "next", find the next one */
+ if (cur_gfn == next_gfn)
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ /* Reached the end of memory or of the buffer, stop */
+ if ((next_gfn >= mem_end) ||
+ (next_gfn - args->start_gfn >= bufsize))
+ return 0;
+ cur_gfn++;
+ /* Reached the end of the current memslot, take the next one. */
+ if (cur_gfn - ms->base_gfn >= ms->npages) {
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ if (!ms)
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
* This function searches for the next page with dirty CMMA attributes, and
* saves the attributes in the buffer up to either the end of the buffer or
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
@@ -1610,22 +1765,18 @@ out:
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
struct kvm_s390_cmma_log *args)
{
- struct kvm_s390_migration_state *s = kvm->arch.migration_state;
- unsigned long bufsize, hva, pgstev, i, next, cur;
- int srcu_idx, peek, r = 0, rr;
- u8 *res;
-
- cur = args->start_gfn;
- i = next = pgstev = 0;
+ unsigned long bufsize;
+ int srcu_idx, peek, ret;
+ u8 *values;
- if (unlikely(!kvm->arch.use_cmma))
+ if (!kvm->arch.use_cmma)
return -ENXIO;
/* Invalid/unsupported flags were specified */
if (args->flags & ~KVM_S390_CMMA_PEEK)
return -EINVAL;
/* Migration mode query, and we are not doing a migration */
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
- if (!peek && !s)
+ if (!peek && !kvm->arch.migration_mode)
return -EINVAL;
/* CMMA is disabled or was not used, or the buffer has length zero */
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
@@ -1633,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
memset(args, 0, sizeof(*args));
return 0;
}
-
- if (!peek) {
- /* We are not peeking, and there are no dirty pages */
- if (!atomic64_read(&s->dirty_pages)) {
- memset(args, 0, sizeof(*args));
- return 0;
- }
- cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
- args->start_gfn);
- if (cur >= s->bitmap_size) /* nothing found, loop back */
- cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
- if (cur >= s->bitmap_size) { /* again! (very unlikely) */
- memset(args, 0, sizeof(*args));
- return 0;
- }
- next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+ /* We are not peeking, and there are no dirty pages */
+ if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
}
- res = vmalloc(bufsize);
- if (!res)
+ values = vmalloc(bufsize);
+ if (!values)
return -ENOMEM;
- args->start_gfn = cur;
-
down_read(&kvm->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
- while (i < bufsize) {
- hva = gfn_to_hva(kvm, cur);
- if (kvm_is_error_hva(hva)) {
- r = -EFAULT;
- break;
- }
- /* decrement only if we actually flipped the bit to 0 */
- if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
- atomic64_dec(&s->dirty_pages);
- r = get_pgste(kvm->mm, hva, &pgstev);
- if (r < 0)
- pgstev = 0;
- /* save the value */
- res[i++] = (pgstev >> 24) & 0x43;
- /*
- * if the next bit is too far away, stop.
- * if we reached the previous "next", find the next one
- */
- if (!peek) {
- if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
- break;
- if (cur == next)
- next = find_next_bit(s->pgste_bitmap,
- s->bitmap_size, cur + 1);
- /* reached the end of the bitmap or of the buffer, stop */
- if ((next >= s->bitmap_size) ||
- (next >= args->start_gfn + bufsize))
- break;
- }
- cur++;
- }
+ if (peek)
+ ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
+ else
+ ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&kvm->mm->mmap_sem);
- args->count = i;
- args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
- rr = copy_to_user((void __user *)args->values, res, args->count);
- if (rr)
- r = -EFAULT;
+ if (kvm->arch.migration_mode)
+ args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
+ else
+ args->remaining = 0;
- vfree(res);
- return r;
+ if (copy_to_user((void __user *)args->values, values, args->count))
+ ret = -EFAULT;
+
+ vfree(values);
+ return ret;
}
/*
@@ -2139,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- if (kvm->arch.migration_state) {
- vfree(kvm->arch.migration_state->pgste_bitmap);
- kfree(kvm->arch.migration_state);
- }
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
@@ -2300,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
@@ -2549,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
}
if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF;
-
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
if (vcpu->arch.sie_block->gd) {
vcpu->arch.sie_block->eca |= ECA_AIV;
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
@@ -3467,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
preempt_enable();
}
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
kvm_run->kvm_dirty_regs = 0;
}
@@ -3506,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
__ctl_clear_bit(2, 4);
vcpu->arch.host_gscb = NULL;
}
-
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -4082,6 +4194,11 @@ static int __init kvm_s390_init(void)
return -ENODEV;
}
+ if (nested && hpage) {
+ pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
+ return -EINVAL;
+ }
+
for (i = 0; i < 16; i++)
kvm_s390_fac_base[i] |=
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index eb0eb60c7be6..d68f10441a16 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc;
- struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu);
/* Already enabled? */
- if (vcpu->kvm->arch.use_skf &&
- !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
- !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ if (vcpu->arch.skey_enabled)
return 0;
rc = s390_enable_skey();
@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
if (!vcpu->kvm->arch.use_skf)
- sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
else
- sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ vcpu->arch.skey_enabled = true;
return 0;
}
@@ -246,9 +244,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
static int handle_iske(struct kvm_vcpu *vcpu)
{
- unsigned long addr;
+ unsigned long gaddr, vmaddr;
unsigned char key;
int reg1, reg2;
+ bool unlocked;
int rc;
vcpu->stat.instruction_iske++;
@@ -262,18 +261,28 @@ static int handle_iske(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
- addr = kvm_s390_logical_to_effective(vcpu, addr);
- addr = kvm_s390_real_to_abs(vcpu, addr);
- addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
- if (kvm_is_error_hva(addr))
+ gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+ gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
+retry:
+ unlocked = false;
down_read(&current->mm->mmap_sem);
- rc = get_guest_storage_key(current->mm, addr, &key);
- up_read(&current->mm->mmap_sem);
+ rc = get_guest_storage_key(current->mm, vmaddr, &key);
+
+ if (rc) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (!rc) {
+ up_read(&current->mm->mmap_sem);
+ goto retry;
+ }
+ }
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ up_read(&current->mm->mmap_sem);
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
vcpu->run->s.regs.gprs[reg1] |= key;
return 0;
@@ -281,8 +290,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
static int handle_rrbe(struct kvm_vcpu *vcpu)
{
- unsigned long addr;
+ unsigned long vmaddr, gaddr;
int reg1, reg2;
+ bool unlocked;
int rc;
vcpu->stat.instruction_rrbe++;
@@ -296,19 +306,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
- addr = kvm_s390_logical_to_effective(vcpu, addr);
- addr = kvm_s390_real_to_abs(vcpu, addr);
- addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
- if (kvm_is_error_hva(addr))
+ gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
+ gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
+ if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
+retry:
+ unlocked = false;
down_read(&current->mm->mmap_sem);
- rc = reset_guest_reference_bit(current->mm, addr);
- up_read(&current->mm->mmap_sem);
+ rc = reset_guest_reference_bit(current->mm, vmaddr);
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ if (!rc) {
+ up_read(&current->mm->mmap_sem);
+ goto retry;
+ }
+ }
if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
+ up_read(&current->mm->mmap_sem);
kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
@@ -323,6 +341,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
unsigned long start, end;
unsigned char key, oldkey;
int reg1, reg2;
+ bool unlocked;
int rc;
vcpu->stat.instruction_sske++;
@@ -355,19 +374,28 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
while (start != end) {
- unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+ unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+ unlocked = false;
- if (kvm_is_error_hva(addr))
+ if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
down_read(&current->mm->mmap_sem);
- rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
+ rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
- up_read(&current->mm->mmap_sem);
- if (rc < 0)
+
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ rc = !rc ? -EAGAIN : rc;
+ }
+ if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- start += PAGE_SIZE;
+
+ up_read(&current->mm->mmap_sem);
+ if (rc >= 0)
+ start += PAGE_SIZE;
}
if (m3 & (SSKE_MC | SSKE_MR)) {
@@ -948,15 +976,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
while (start != end) {
- unsigned long useraddr;
+ unsigned long vmaddr;
+ bool unlocked = false;
/* Translate guest address to host address */
- useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
- if (kvm_is_error_hva(useraddr))
+ vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+ if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
- if (clear_user((void __user *)useraddr, PAGE_SIZE))
+ if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
@@ -966,14 +995,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
down_read(&current->mm->mmap_sem);
- rc = cond_set_guest_storage_key(current->mm, useraddr,
+ rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
- up_read(&current->mm->mmap_sem);
- if (rc < 0)
+ if (rc < 0) {
+ rc = fixup_user_fault(current, current->mm, vmaddr,
+ FAULT_FLAG_WRITE, &unlocked);
+ rc = !rc ? -EAGAIN : rc;
+ }
+ if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- }
- start += PAGE_SIZE;
+ up_read(&current->mm->mmap_sem);
+ if (rc >= 0)
+ start += PAGE_SIZE;
+ }
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
@@ -987,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+/*
+ * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ */
+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
- struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
int r1, r2, nappended, entries;
unsigned long gfn, hva, res, pgstev, ptev;
unsigned long *cbrlo;
@@ -1039,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT;
}
- if (orc && gfn < ms->bitmap_size) {
- /* increment only if we are really flipping the bit to 1 */
- if (!test_and_set_bit(gfn, ms->pgste_bitmap))
- atomic64_inc(&ms->dirty_pages);
+ if (orc) {
+ struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
+
+ /* Increment only if we are really flipping the bit */
+ if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
}
return nappended;
@@ -1071,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
: ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (likely(!vcpu->kvm->arch.migration_state)) {
+ if (!vcpu->kvm->arch.migration_mode) {
/*
* CMMA is enabled in the KVM settings, but is disabled in
* the SIE block and in the mm_context, and we are not doing
@@ -1099,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
- /* Account for the possible extra cbrl entry */
- i = do_essa(vcpu, orc);
+ int srcu_idx;
+
+ down_read(&vcpu->kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ i = __do_essa(vcpu, orc);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ up_read(&vcpu->kvm->mm->mmap_sem);
if (i < 0)
return i;
+ /* Account for the possible extra cbrl entry */
entries += i;
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 84c89cb9636f..63844b95c22c 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -2,7 +2,7 @@
/*
* kvm nested virtualization support for s390x
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 139))
scb_s->ecd |= scb_o->ecd & ECD_MEF;
+ /* etoken */
+ if (test_kvm_facility(vcpu->kvm, 156))
+ scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
- if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+ if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
+ (scb_s->ecd & ECD_ETOKENF)) {
unsigned long sdnxc;
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* - < 0 if an error occurred
*/
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+ __releases(vcpu->kvm->srcu)
+ __acquires(vcpu->kvm->srcu)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 2311f15be9cf..40c4d59c926e 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -17,7 +17,7 @@
ENTRY(memmove)
ltgr %r4,%r4
lgr %r1,%r2
- bzr %r14
+ jz .Lmemmove_exit
aghi %r4,-1
clgr %r2,%r3
jnh .Lmemmove_forward
@@ -36,6 +36,7 @@ ENTRY(memmove)
.Lmemmove_forward_remainder:
larl %r5,.Lmemmove_mvc
ex %r4,0(%r5)
+.Lmemmove_exit:
BR_EX %r14
.Lmemmove_reverse:
ic %r0,0(%r4,%r3)
@@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove)
*/
ENTRY(memset)
ltgr %r4,%r4
- bzr %r14
+ jz .Lmemset_exit
ltgr %r3,%r3
jnz .Lmemset_fill
aghi %r4,-1
@@ -80,6 +81,7 @@ ENTRY(memset)
.Lmemset_clear_remainder:
larl %r3,.Lmemset_xc
ex %r4,0(%r3)
+.Lmemset_exit:
BR_EX %r14
.Lmemset_fill:
cghi %r4,1
@@ -115,7 +117,7 @@ EXPORT_SYMBOL(memset)
*/
ENTRY(memcpy)
ltgr %r4,%r4
- bzr %r14
+ jz .Lmemcpy_exit
aghi %r4,-1
srlg %r5,%r4,8
ltgr %r5,%r5
@@ -124,6 +126,7 @@ ENTRY(memcpy)
.Lmemcpy_remainder:
larl %r5,.Lmemcpy_mvc
ex %r4,0(%r5)
+.Lmemcpy_exit:
BR_EX %r14
.Lmemcpy_loop:
mvc 0(256,%r1),0(%r3)
@@ -145,9 +148,9 @@ EXPORT_SYMBOL(memcpy)
.macro __MEMSET bits,bytes,insn
ENTRY(__memset\bits)
ltgr %r4,%r4
- bzr %r14
+ jz .L__memset_exit\bits
cghi %r4,\bytes
- je .L__memset_exit\bits
+ je .L__memset_store\bits
aghi %r4,-(\bytes+1)
srlg %r5,%r4,8
ltgr %r5,%r5
@@ -163,8 +166,9 @@ ENTRY(__memset\bits)
larl %r5,.L__memset_mvc\bits
ex %r4,0(%r5)
BR_EX %r14
-.L__memset_exit\bits:
+.L__memset_store\bits:
\insn %r3,0(%r2)
+.L__memset_exit\bits:
BR_EX %r14
.L__memset_mvc\bits:
mvc \bytes(1,%r1),0(%r1)
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 6cf024eb2085..510a18299196 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -191,12 +191,7 @@ static void cmm_set_timer(void)
del_timer(&cmm_timer);
return;
}
- if (timer_pending(&cmm_timer)) {
- if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ))
- return;
- }
- cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ;
- add_timer(&cmm_timer);
+ mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -251,45 +246,42 @@ static int cmm_skip_blanks(char *cp, char **endp)
return str != cp;
}
-static struct ctl_table cmm_table[];
-
static int cmm_pages_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- char buf[16], *p;
- unsigned int len;
- long nr;
+ long nr = cmm_get_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
- if (write) {
- len = *lenp;
- if (copy_from_user(buf, buffer,
- len > sizeof(buf) ? sizeof(buf) : len))
- return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
- cmm_skip_blanks(buf, &p);
- nr = simple_strtoul(p, &p, 0);
- if (ctl == &cmm_table[0])
- cmm_set_pages(nr);
- else
- cmm_add_timed_pages(nr);
- } else {
- if (ctl == &cmm_table[0])
- nr = cmm_get_pages();
- else
- nr = cmm_get_timed_pages();
- len = sprintf(buf, "%ld\n", nr);
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
- }
- *lenp = len;
- *ppos += len;
+ cmm_set_pages(nr);
+ return 0;
+}
+
+static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ long nr = cmm_get_timed_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
+
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ cmm_add_timed_pages(nr);
return 0;
}
@@ -338,7 +330,7 @@ static struct ctl_table cmm_table[] = {
{
.procname = "cmm_timed_pages",
.mode = 0644,
- .proc_handler = cmm_pages_handler,
+ .proc_handler = cmm_timed_pages_handler,
},
{
.procname = "cmm_timeout",
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 6ad15d3fab81..84111a43ea29 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -80,7 +80,7 @@ struct qin64 {
struct dcss_segment {
struct list_head list;
char dcss_name[8];
- char res_name[15];
+ char res_name[16];
unsigned long start_addr;
unsigned long end;
atomic_t ref_count;
@@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
memcpy(&seg->res_name, seg->dcss_name, 8);
EBCASC(seg->res_name, 8);
seg->res_name[8] = '\0';
- strncat(seg->res_name, " (DCSS)", 7);
+ strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
seg->res->name = seg->res_name;
rc = seg->vm_segtype;
if (rc == SEG_TYPE_SC ||
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e074480d3598..72af23bacbb5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -341,7 +341,8 @@ static noinline int signal_return(struct pt_regs *regs)
return -EACCES;
}
-static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
+static noinline void do_fault_error(struct pt_regs *regs, int access,
+ vm_fault_t fault)
{
int si_code;
@@ -401,7 +402,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
*/
-static inline int do_exception(struct pt_regs *regs, int access)
+static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
{
struct gmap *gmap;
struct task_struct *tsk;
@@ -411,7 +412,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
unsigned long trans_exc_code;
unsigned long address;
unsigned int flags;
- int fault;
+ vm_fault_t fault;
tsk = current;
/*
@@ -502,6 +503,8 @@ retry:
/* No reason to continue if interrupted by SIGKILL. */
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
fault = VM_FAULT_SIGNAL;
+ if (flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out_up;
goto out;
}
if (unlikely(fault & VM_FAULT_ERROR))
@@ -562,7 +565,8 @@ out:
void do_protection_exception(struct pt_regs *regs)
{
unsigned long trans_exc_code;
- int access, fault;
+ int access;
+ vm_fault_t fault;
trans_exc_code = regs->int_parm_long;
/*
@@ -597,7 +601,8 @@ NOKPROBE_SYMBOL(do_protection_exception);
void do_dat_exception(struct pt_regs *regs)
{
- int access, fault;
+ int access;
+ vm_fault_t fault;
access = VM_READ | VM_EXEC | VM_WRITE;
fault = do_exception(regs, access);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index bc56ec8abcf7..bb44990c8212 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2,8 +2,10 @@
/*
* KVM guest address space mapping code
*
- * Copyright IBM Corp. 2007, 2016
+ * Copyright IBM Corp. 2007, 2016, 2018
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
*/
#include <linux/kernel.h>
@@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
rcu_read_unlock();
}
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
+ unsigned long gaddr);
+
/**
* gmap_link - set up shadow page tables to connect a host to a guest address
* @gmap: pointer to guest mapping meta data structure
@@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ u64 unprot;
int rc;
BUG_ON(gmap_is_shadow(gmap));
@@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
return -EFAULT;
pmd = pmd_offset(pud, vmaddr);
VM_BUG_ON(pmd_none(*pmd));
- /* large pmds cannot yet be handled */
- if (pmd_large(*pmd))
+ /* Are we allowed to use huge pages? */
+ if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
return -EFAULT;
/* Link gmap segment table entry location to page table. */
rc = radix_tree_preload(GFP_KERNEL);
@@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT, table);
- if (!rc)
- *table = pmd_val(*pmd);
- } else
- rc = 0;
+ if (!rc) {
+ if (pmd_large(*pmd)) {
+ *table = (pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
+ | _SEGMENT_ENTRY_GMAP_UC;
+ } else
+ *table = pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS;
+ }
+ } else if (*table & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
+ unprot = (u64)*table;
+ unprot &= ~_SEGMENT_ENTRY_PROTECT;
+ unprot |= _SEGMENT_ENTRY_GMAP_UC;
+ gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
+ }
spin_unlock(&gmap->guest_table_lock);
spin_unlock(ptl);
radix_tree_preload_end();
@@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
vmaddr |= gaddr & ~PMD_MASK;
/* Find vma in the parent mm */
vma = find_vma(gmap->mm, vmaddr);
+ /*
+ * We do not discard pages that are backed by
+ * hugetlbfs, so we don't have to refault them.
+ */
+ if (vma && is_vm_hugetlb_page(vma))
+ continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size);
}
@@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
*/
static void gmap_pte_op_end(spinlock_t *ptl)
{
- spin_unlock(ptl);
+ if (ptl)
+ spin_unlock(ptl);
+}
+
+/**
+ * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
+ * and return the pmd pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ *
+ * Returns a pointer to the pmd for a guest address, or NULL
+ */
+static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
+{
+ pmd_t *pmdp;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ spin_lock(&gmap->guest_table_lock);
+ pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+
+ if (!pmdp || pmd_none(*pmdp)) {
+ spin_unlock(&gmap->guest_table_lock);
+ return NULL;
+ }
+
+ /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
+ if (!pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+ return pmdp;
+}
+
+/**
+ * gmap_pmd_op_end - release the guest_table_lock if needed
+ * @gmap: pointer to the guest mapping meta data structure
+ * @pmdp: pointer to the pmd
+ */
+static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
+{
+ if (pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+}
+
+/*
+ * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
+ * @pmdp: pointer to the pmd to be protected
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns:
+ * 0 if successfully protected
+ * -EAGAIN if a fixup is needed
+ * -EINVAL if unsupported notifier bits have been specified
+ *
+ * Expected to be called with sg->mm->mmap_sem in read and
+ * guest_table_lock held.
+ */
+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
+ int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
+ pmd_t new = *pmdp;
+
+ /* Fixup needed */
+ if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
+ return -EAGAIN;
+
+ if (prot == PROT_NONE && !pmd_i) {
+ pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (prot == PROT_READ && !pmd_p) {
+ pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
+ pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (bits & GMAP_NOTIFY_MPROT)
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
+
+ /* Shadow GMAP protection needs split PMDs */
+ if (bits & GMAP_NOTIFY_SHADOW)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * gmap_protect_pte - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @pmdp: pointer to the pmd associated with the pte
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EAGAIN if a fixup is needed.
+ *
+ * Expected to be called with sg->mm->mmap_sem in read
+ */
+static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int rc;
+ pte_t *ptep;
+ spinlock_t *ptl = NULL;
+ unsigned long pbits = 0;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return -EAGAIN;
+
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
+ if (!ptep)
+ return -ENOMEM;
+
+ pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
+ pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
+ /* Protect and unlock. */
+ rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
+ gmap_pte_op_end(ptl);
+ return rc;
}
/*
@@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl)
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
{
- unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+ unsigned long vmaddr, dist;
+ pmd_t *pmdp;
int rc;
BUG_ON(gmap_is_shadow(gmap));
while (len) {
rc = -EAGAIN;
- ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
- if (ptep) {
- rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits);
- gmap_pte_op_end(ptl);
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (pmdp) {
+ if (!pmd_large(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ len -= PAGE_SIZE;
+ gaddr += PAGE_SIZE;
+ }
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
+ len = len < dist ? 0 : len - dist;
+ gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
}
if (rc) {
+ if (rc == -EINVAL)
+ return rc;
+
+ /* -EAGAIN, fixup of userspace mm and gmap */
vmaddr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(vmaddr))
return vmaddr;
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
if (rc)
return rc;
- continue;
}
- gaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
}
return 0;
}
@@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
return -EINVAL;
down_read(&gmap->mm->mmap_sem);
- rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT);
+ rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
up_read(&gmap->mm->mmap_sem);
return rc;
}
@@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
unsigned long limit;
int rc;
+ BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
BUG_ON(gmap_is_shadow(parent));
spin_lock(&parent->shadow_lock);
sg = gmap_find_shadow(parent, asce, edat_level);
@@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
down_read(&parent->mm->mmap_sem);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, PGSTE_VSIE_BIT);
+ PROT_READ, GMAP_NOTIFY_SHADOW);
up_read(&parent->mm->mmap_sem);
spin_lock(&parent->shadow_lock);
new->initialized = true;
@@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
}
EXPORT_SYMBOL_GPL(ptep_notify);
+static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
+}
+
+/**
+ * gmap_pmdp_xchg - exchange a gmap pmd with another
+ * @gmap: pointer to the guest address space structure
+ * @pmdp: pointer to the pmd entry
+ * @new: replacement entry
+ * @gaddr: the affected guest address
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
+ unsigned long gaddr)
+{
+ gaddr &= HPAGE_MASK;
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
+ IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *pmdp = new;
+}
+
+static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
+ int purge)
+{
+ pmd_t *pmdp;
+ struct gmap *gmap;
+ unsigned long gaddr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (pmdp) {
+ gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (purge)
+ __pmdp_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
+ * flushing
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 0);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
+
+/**
+ * gmap_pmdp_csp - csp all affected guest pmd entries
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 1);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
+
+/**
+ * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_LOCAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
+
+/**
+ * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
+
+/**
+ * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
+ * @gmap: pointer to guest address space
+ * @pmdp: pointer to the pmd to be tested
+ * @gaddr: virtual address in the guest address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return false;
+
+ /* Already protected memory, which did not change is clean */
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
+ return false;
+
+ /* Clear UC indication and reset protection */
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
+ gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
+ return true;
+}
+
+/**
+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
+ * @gmap: pointer to guest address space
+ * @bitmap: dirty bitmap for this pmd
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: virtual address in the host address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr)
+{
+ int i;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return;
+
+ if (pmd_large(*pmdp)) {
+ if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
+ bitmap_fill(bitmap, _PAGE_ENTRIES);
+ } else {
+ for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
+ if (!ptep)
+ continue;
+ if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
+ set_bit(i, bitmap);
+ spin_unlock(ptl);
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
+}
+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+
static inline void thp_split_mm(struct mm_struct *mm)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
+static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
}
+static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+ unsigned long hmask, unsigned long next,
+ struct mm_walk *walk)
+{
+ pmd_t *pmd = (pmd_t *)pte;
+ unsigned long start, end;
+ struct page *page = pmd_page(*pmd);
+
+ /*
+ * The write check makes sure we do not set a key on shared
+ * memory. This is needed as the walker does not differentiate
+ * between actual guest memory and the process executable or
+ * shared libraries.
+ */
+ if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
+ return 0;
+
+ start = pmd_val(*pmd) & HPAGE_MASK;
+ end = start + HPAGE_SIZE - 1;
+ __storage_key_init_range(start, end);
+ set_bit(PG_arch_1, &page->flags);
+ return 0;
+}
+
int s390_enable_skey(void)
{
- struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_walk walk = {
+ .hugetlb_entry = __s390_enable_skey_hugetlb,
+ .pte_entry = __s390_enable_skey_pte,
+ };
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int rc = 0;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index e804090f4470..b0246c705a19 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste)
return pte;
}
+static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
+{
+ struct page *page;
+ unsigned long size, paddr;
+
+ if (!mm_uses_skeys(mm) ||
+ rste & _SEGMENT_ENTRY_INVALID)
+ return;
+
+ if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ page = pud_page(__pud(rste));
+ size = PUD_SIZE;
+ paddr = rste & PUD_MASK;
+ } else {
+ page = pmd_page(__pmd(rste));
+ size = PMD_SIZE;
+ paddr = rste & PMD_MASK;
+ }
+
+ if (!test_and_set_bit(PG_arch_1, &page->flags))
+ __storage_key_init_range(paddr, paddr + size - 1);
+}
+
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
@@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
else
rste |= _SEGMENT_ENTRY_LARGE;
+ clear_huge_pte_skeys(mm, rste);
pte_val(*ptep) = rste;
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 382153ff17e3..dc3cede7f2ec 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable)
list_for_each(l, &zone->free_area[order].free_list[t]) {
page = list_entry(l, struct page, lru);
if (make_stable)
- set_page_stable_dat(page, 0);
+ set_page_stable_dat(page, order);
else
set_page_unused(page, order);
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index c44171588d08..f8c6faab41f4 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -14,7 +14,7 @@
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
- asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
+ asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
: [addr] "+a" (addr) : [skey] "d" (skey));
return addr;
}
@@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
{
unsigned long boundary, size;
- if (!PAGE_DEFAULT_KEY)
- return;
while (start < end) {
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
@@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
continue;
}
}
- page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
+ page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
start += PAGE_SIZE;
}
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 84bd6329a88d..76d89ee8b428 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -28,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = {
.data = &page_table_allocate_pgste,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &page_table_allocate_pgste_min,
.extra2 = &page_table_allocate_pgste_max,
},
@@ -252,6 +252,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
spin_unlock_bh(&mm->context.lock);
if (mask != 0)
return;
+ } else {
+ atomic_xor_bits(&page->_refcount, 3U << 24);
}
pgtable_page_dtor(page);
@@ -304,6 +306,8 @@ static void __tlb_remove_table(void *_table)
break;
/* fallthrough */
case 3: /* 4K page table with pgstes */
+ if (mask & 3)
+ atomic_xor_bits(&page->_refcount, 3 << 24);
pgtable_page_dtor(page);
__free_page(page);
break;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 301e466e4263..f2cc7da473e4 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
mm->context.asce, IDTE_LOCAL);
else
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_local(mm, addr);
}
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (MACHINE_HAS_TLB_GUEST) {
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else if (MACHINE_HAS_IDTE) {
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
- else
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else {
__pmdp_csp(pmdp);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_csp(mm, addr);
+ }
}
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
@@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
+ if (mm_has_pgste(mm))
+ gmap_pmdp_invalidate(mm, addr);
} else {
pmdp_idte_global(mm, addr, pmdp);
}
@@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
return old;
}
+static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return NULL;
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, addr);
+ return pmd;
+}
+
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t new)
{
@@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/*
* Test and reset if a guest page is dirty
*/
-bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- spinlock_t *ptl;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
pgste_t pgste;
- pte_t *ptep;
pte_t pte;
bool dirty;
int nodat;
- pgd = pgd_offset(mm, addr);
- p4d = p4d_alloc(mm, pgd, addr);
- if (!p4d)
- return false;
- pud = pud_alloc(mm, p4d, addr);
- if (!pud)
- return false;
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return false;
- /* We can't run guests backed by huge pages, but userspace can
- * still set them up and then try to migrate them without any
- * migration support.
- */
- if (pmd_large(*pmd))
- return true;
-
- ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (unlikely(!ptep))
- return false;
-
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
@@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
*ptep = pte;
}
pgste_set_unlock(ptep, pgste);
-
- spin_unlock(ptl);
return dirty;
}
-EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq)
{
- unsigned long keyul;
+ unsigned long keyul, paddr;
spinlock_t *ptl;
pgste_t old, new;
+ pmd_t *pmdp;
pte_t *ptep;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ /*
+ * Huge pmds need quiescing operations, they are
+ * always mapped.
+ */
+ page_set_storage_key(paddr, key, 1);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
- unsigned long address, bits, skey;
+ unsigned long bits, skey;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = (unsigned long) page_get_storage_key(address);
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(paddr);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set storage key ACC and FP */
- page_set_storage_key(address, skey, !nq);
+ page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52;
}
@@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
+ unsigned long paddr;
pgste_t old, new;
+ pmd_t *pmdp;
pte_t *ptep;
int cc = 0;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ cc = page_reset_referenced(paddr);
+ spin_unlock(ptl);
+ return cc;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
pgste_val(new) &= ~PGSTE_GR_BIT;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
- cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
}
@@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char *key)
{
+ unsigned long paddr;
spinlock_t *ptl;
pgste_t pgste;
+ pmd_t *pmdp;
pte_t *ptep;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ /* Not yet mapped memory has a zero key */
+ spin_unlock(ptl);
+ *key = 0;
+ return 0;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ *key = page_get_storage_key(paddr);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ paddr = pte_val(*ptep) & PAGE_MASK;
if (!(pte_val(*ptep) & _PAGE_INVALID))
- *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+ *key = page_get_storage_key(paddr);
/* Reflect guest's logical view, not physical */
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
pgste_set_unlock(ptep, pgste);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d2db8acb1a55..d7052cbe984f 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -485,8 +485,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
/* br %r1 */
_EMIT2(0x07f1);
} else {
- /* larl %r1,.+14 */
- EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
/* ex 0,S390_lowcore.br_r1_tampoline */
EMIT4_DISP(0x44000000, REG_0, REG_0,
offsetof(struct lowcore, br_r1_trampoline));
@@ -1286,6 +1284,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto free_addrs;
}
if (bpf_jit_prog(&jit, fp)) {
+ bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
}
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 06a80434cfe6..5bd374491f94 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -134,6 +134,8 @@ void __init numa_setup(void)
{
pr_info("NUMA mode: %s\n", mode->name);
nodes_clear(node_possible_map);
+ /* Initially attach all possible CPUs to node 0. */
+ cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
if (mode->setup)
mode->setup();
numa_setup_memory();
@@ -141,20 +143,6 @@ void __init numa_setup(void)
}
/*
- * numa_init_early() - Initialization initcall
- *
- * This runs when only one CPU is online and before the first
- * topology update is called for by the scheduler.
- */
-static int __init numa_init_early(void)
-{
- /* Attach all possible CPUs to node 0 for now. */
- cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
- return 0;
-}
-early_initcall(numa_init_early);
-
-/*
* numa_init_late() - Initialization initcall
*
* Register NUMA nodes.
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 4902fed221c0..9f6f392a4461 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -420,7 +420,8 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Request MSI interrupts */
hwirq = 0;
for_each_pci_msi_entry(msi, pdev) {
- rc = -EIO;
+ if (hwirq >= msi_vecs)
+ break;
irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
if (irq < 0)
return -ENOMEM;
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index b482e95b6249..04388a254ffb 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -48,6 +48,10 @@ static char *pci_fmt2_names[] = {
"Maximum work units",
};
+static char *pci_fmt3_names[] = {
+ "Transmitted bytes",
+};
+
static char *pci_sw_names[] = {
"Allocated pages",
"Mapped pages",
@@ -89,7 +93,6 @@ static int pci_perf_show(struct seq_file *m, void *v)
}
/* header */
- seq_printf(m, "FMB @ %p\n", zdev->fmb);
seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
@@ -112,6 +115,10 @@ static int pci_perf_show(struct seq_file *m, void *v)
pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names),
&zdev->fmb->fmt2.consumed_work_units);
break;
+ case 3:
+ pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names),
+ &zdev->fmb->fmt3.tx_bytes);
+ break;
default:
seq_puts(m, "Unknown format\n");
}
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 1ace023cbdce..ce6a3f75065b 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -7,13 +7,13 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o
targets += $(purgatory-y) purgatory.ro kexec-purgatory.c
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
-$(obj)/sha256.o: $(srctree)/lib/sha256.c
+$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S
+$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-$(obj)/string.o: $(srctree)/arch/s390/lib/string.c
+$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
$(call if_changed_rule,cc_o_c)
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib
@@ -21,15 +21,15 @@ LDFLAGS_purgatory.ro += -z nodefaultlib
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
-KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float
+KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
-CMD_BIN2C = $(objtree)/scripts/basic/bin2c
quiet_cmd_bin2c = BIN2C $@
- cmd_bin2c = $(CMD_BIN2C) kexec_purgatory < $< > $@
+ cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
$(call if_changed,bin2c)
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
index 660c96a05a9b..2e3707b12edd 100644
--- a/arch/s390/purgatory/head.S
+++ b/arch/s390/purgatory/head.S
@@ -243,33 +243,26 @@ gprregs:
.quad 0
.endr
-purgatory_sha256_digest:
- .global purgatory_sha256_digest
- .rept 32 /* SHA256_DIGEST_SIZE */
- .byte 0
- .endr
-
-purgatory_sha_regions:
- .global purgatory_sha_regions
- .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */
- .byte 0
- .endr
-
-kernel_entry:
- .global kernel_entry
- .quad 0
-
-kernel_type:
- .global kernel_type
- .quad 0
-
-crash_start:
- .global crash_start
- .quad 0
+/* Macro to define a global variable with name and size (in bytes) to be
+ * shared with C code.
+ *
+ * Add the .size and .type attribute to satisfy checks on the Elf_Sym during
+ * purgatory load.
+ */
+.macro GLOBAL_VARIABLE name,size
+\name:
+ .global \name
+ .size \name,\size
+ .type \name,object
+ .skip \size,0
+.endm
-crash_size:
- .global crash_size
- .quad 0
+GLOBAL_VARIABLE purgatory_sha256_digest,32
+GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE
+GLOBAL_VARIABLE kernel_entry,8
+GLOBAL_VARIABLE kernel_type,8
+GLOBAL_VARIABLE crash_start,8
+GLOBAL_VARIABLE crash_size,8
.align PAGE_SIZE
stack:
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 4e2beb3c29b7..3528e6da4e87 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -12,15 +12,6 @@
#include <linux/string.h>
#include <asm/purgatory.h>
-struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
-u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
-
-u64 kernel_entry;
-u64 kernel_type;
-
-u64 crash_start;
-u64 crash_size;
-
int verify_sha256_digest(void)
{
struct kexec_sha_region *ptr, *end;
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
index d92f2d94a5d9..9bba2c14e0ca 100644
--- a/arch/s390/scripts/Makefile.chkbss
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -2,13 +2,22 @@
quiet_cmd_chkbss = CHKBSS $<
define cmd_chkbss
+ rm -f $@; \
if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
echo "error: $< .bss section is not empty" >&2; exit 1; \
fi; \
touch $@;
endef
-$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss))
+chkbss-target ?= $(obj)/built-in.a
+ifneq (,$(findstring /,$(chkbss)))
+chkbss-files := $(patsubst %, %.chkbss, $(chkbss))
+else
+chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss))
+endif
+
+$(chkbss-target): $(chkbss-files)
+targets += $(notdir $(chkbss-files))
%.o.chkbss: %.o
$(call cmd,chkbss)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 90a8c9e84ca6..0c85aedcf9b3 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -4,7 +4,7 @@
* numbering scheme from the Princples of Operations: most significant bit
* has bit number 0.
*
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2018
*
*/
@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){
+ 156, /* etoken facility */
-1 /* END */
}
},
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index 259aa0680d1a..a1bc02b29c81 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -257,7 +257,7 @@ static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset)
if (!desc->group)
exit(EXIT_FAILURE);
group = &desc->group[desc->nr_groups - 1];
- strncpy(group->opcode, insn->opcode, 2);
+ memcpy(group->opcode, insn->opcode, 2);
group->type = insn->type;
group->offset = offset;
group->count = 1;
@@ -283,7 +283,7 @@ static void print_opcode_table(struct gen_opcode *desc)
continue;
add_to_group(desc, insn, offset);
if (strncmp(opcode, insn->opcode, 2)) {
- strncpy(opcode, insn->opcode, 2);
+ memcpy(opcode, insn->opcode, 2);
printf("\t/* %.2s */ \\\n", opcode);
}
print_opcode(insn, offset);