From 64e03ff72623b8c2ea89ca3cb660094e019ed4ae Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 16 May 2018 09:37:25 +0200 Subject: s390/qdio: reset old sbal_state flags When allocating a new AOB fails, handle_outbound() is still capable of transmitting the selected buffer (just without async completion). But if a previous transfer on this queue slot used async completion, its sbal_state flags field is still set to QDIO_OUTBUF_STATE_FLAG_PENDING. So when the upper layer driver sees this stale flag, it expects an async completion that never happens. Fix this by unconditionally clearing the flags field. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Cc: #v3.2+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index de11ecc99c7c..9c9970a5dfb1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -262,7 +262,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 -- cgit v1.2.3-59-g8ed1b From 157484abb9b05e3f49cd3903202ffad35cea4350 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 15 Jun 2018 10:22:18 +0200 Subject: s390: disable asm code expolines if cc does not support it To avoid a mixture of asm code with expolines and c code without them, propagate CC_USING_EXPOLINE to KBUILD_AFLAGS and use it to detect whether asm code should have expolines or not. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 1 + arch/s390/include/asm/nospec-insn.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 68a690442be0..8a41110d3a9a 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -82,6 +82,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif endif diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index a01f81186e86..123dac3717b3 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_EXPOLINE +#ifdef CC_USING_EXPOLINE _LC_BR_R1 = __LC_BR_R1 @@ -189,7 +189,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro BASR_EX rsave,rtarget,ruse=%r1 basr \rsave,\rtarget .endm -#endif +#endif /* CC_USING_EXPOLINE */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3-59-g8ed1b From 19f73e16199ee21b95e15bb5c380ad69f12e8c5b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 15 Jun 2018 10:48:54 +0200 Subject: s390/build: remove obsolete -mkernel-backchain flag -mkernel-backchain cc flag is obsolete since quite a while, and not present in minimal (supported by s390) gcc version 4.3. Removing it. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 8a41110d3a9a..cdf3557864e1 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -52,13 +52,6 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -# old style option for packed stacks -ifeq ($(call cc-option-yn,-mkernel-backchain),y) -cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - -# new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -- cgit v1.2.3-59-g8ed1b From 76bf9d6cef5788c769af987ff6f8ece40c6ab152 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 15 Jun 2018 12:28:05 +0200 Subject: s390/decompressor: correct build flags The decompressor requires its own set of cc and asm flags, to avoid building with features which do not make sense at such an early boot stage (e.g. expoline, ftrace). Currently cc flags are already set for the decompressor, but "cflags-y" is not exported and hence empty. To fix that and to add asm flags, define and export KBUILD_AFLAGS_DECOMPRESSOR and KBUILD_CFLAGS_DECOMPRESSOR and rely on them in the decompressor's Makefile. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 20 +++++++++++++++++--- arch/s390/boot/compressed/Makefile | 13 ++++--------- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index cdf3557864e1..d7ec2baa2f75 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,8 +14,14 @@ LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 +KBUILD_CFLAGS += -m64 +KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__ +KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ @@ -57,6 +63,9 @@ cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif +KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) +KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) + ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) ifneq ($(call cc-option-yn,-mstack-size=8192),y) @@ -64,8 +73,11 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack +ifdef CONFIG_WARN_DYNAMIC_STACK + ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) + KBUILD_CFLAGS += -mwarn-dynamicstack + KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack + endif endif ifdef CONFIG_EXPOLINE @@ -96,6 +108,8 @@ KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) +export KBUILD_AFLAGS_DECOMPRESSOR +export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 5766f7b9b271..4f81d14c9f35 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,20 +6,15 @@ # KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += misc.o piggy.o sizes.h head.o -KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2 -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) -KBUILD_CFLAGS += $(call cc-option,-ffreestanding) - -GCOV_PROFILE := n -UBSAN_SANITIZE := n +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o) OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o -- cgit v1.2.3-59-g8ed1b From 00f2fb573fdc3c21f3b7266a4a8758a85943edfb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 11 Jun 2018 14:35:01 +0200 Subject: s390: remove uncompressed kernel image build Dropping support for uncompressed kernel "image" build. Having both image and bzImage makes it complicated to add new code to an early boot phase (which is part of vmlinux for uncompressed kernel and a separate arch/s390/boot/compressed/vmlinux for bzImage). e.g. sclp_early_core.o is used for both, the decompressor phase and the main kernel. The fact of having uncompressed kernel "image" forces us to have a single object file and sacrifice instrumentation flags on such files (so that we could use them early). The story gets much more complicated with the need to utilize some of the string functions. With bzImage only support, we have 2 separate boot stages each built and linked separately, which allows to reuse some shared code, but recompile with appropriate flags. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 7 +++---- arch/s390/boot/Makefile | 6 +----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d7ec2baa2f75..ea3e610ead75 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -129,7 +129,7 @@ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls tools := arch/s390/tools -all: image bzImage +all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage @@ -137,7 +137,7 @@ KBUILD_IMAGE := $(boot)/bzImage install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -image bzImage: vmlinux +bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zfcpdump: @@ -160,8 +160,7 @@ archprepare: # Don't use tabs in echo arguments define archhelp - echo '* image - Kernel image for IPL ($(boot)/image)' - echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index d1fa37fcce83..da5f299dcd18 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,13 +3,9 @@ # Makefile for the linux s390-specific parts of the memory manager. # -targets := image -targets += bzImage +targets := bzImage subdir- := compressed -$(obj)/image: vmlinux FORCE - $(call if_changed,objcopy) - $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) -- cgit v1.2.3-59-g8ed1b From bd79d66329580d6c3fd9556423d2e5124906cfdd Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 15:11:40 +0200 Subject: s390/decompressor: trim the kernel image up to 1M Move head64.S main kernel entry point "startup_continue" to 0x100000 and trim everything which is below 1M during build. So, that the decompressor would unpack the main kernel image, move it to 0x100000 and jump to startup_continue. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 2 +- arch/s390/boot/compressed/head.S | 6 +++--- arch/s390/kernel/head64.S | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 4f81d14c9f35..9b48e7b86fc8 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -24,7 +24,7 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(call if_changed,ld) -TRIM_HEAD_SIZE := 0x11000 +TRIM_HEAD_SIZE := 0x100000 sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 9f94eca0f467..1c69907dba28 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -23,7 +23,7 @@ ENTRY(startup_continue) aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to - # 0x11000, where startup_continue of the decompressed image is supposed + # 0x100000, where startup_continue of the decompressed image is supposed # to be. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) @@ -33,7 +33,7 @@ ENTRY(startup_continue) la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) # When the memory mover is done we pass control to - # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in # the decompressed image. lgr %r6,%r2 br %r1 @@ -47,6 +47,6 @@ mover_end: .Lstack: .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) .Loffset: - .quad 0x11000 + .quad 0x100000 .Lmvsize: .quad SZ__bss_start diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 791cb9000e86..53e5cb0d6f7f 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,6 +16,7 @@ #include __HEAD + .org 0x100000 - 0x11000 # head.o ends at 0x11000 ENTRY(startup_continue) tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f @@ -88,7 +89,6 @@ ENTRY(startup_continue) ENTRY(_ehead) - .org 0x100000 - 0x11000 # head.o ends at 0x11000 # # startup-code, running in absolute addressing mode # -- cgit v1.2.3-59-g8ed1b From 8282cd64d0f9a5292e9f8408a96733523e7f1fec Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 15:11:40 +0200 Subject: s390/boot: make head.S and als.c be part of the decompressor only Since uncompressed kernel image does not have to be bootable anymore, move head.S, head_kdump.S and als.c to boot/ folder and compile them in just in the decompressor. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 3 +- arch/s390/boot/Makefile | 33 +++- arch/s390/boot/als.c | 127 ++++++++++++++ arch/s390/boot/compressed/Makefile | 4 +- arch/s390/boot/head.S | 339 +++++++++++++++++++++++++++++++++++++ arch/s390/boot/head_kdump.S | 101 +++++++++++ arch/s390/kernel/Makefile | 24 +-- arch/s390/kernel/als.c | 128 -------------- arch/s390/kernel/entry.h | 1 - arch/s390/kernel/head.S | 339 ------------------------------------- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/head_kdump.S | 101 ----------- arch/s390/kernel/vmlinux.lds.S | 2 +- 13 files changed, 605 insertions(+), 599 deletions(-) create mode 100644 arch/s390/boot/als.c create mode 100644 arch/s390/boot/head.S create mode 100644 arch/s390/boot/head_kdump.S delete mode 100644 arch/s390/kernel/als.c delete mode 100644 arch/s390/kernel/head.S delete mode 100644 arch/s390/kernel/head_kdump.S (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ea3e610ead75..d25fbc170164 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -113,8 +113,7 @@ export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary -head-y := arch/s390/kernel/head.o -head-y += arch/s390/kernel/head64.o +head-y := arch/s390/kernel/head64.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index da5f299dcd18..05e92d43d383 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,15 +3,42 @@ # Makefile for the linux s390-specific parts of the memory manager. # -targets := bzImage -subdir- := compressed +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n + +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) + +# +# Use -march=z900 for als.c to be able to print an error +# message if the kernel is started on a machine which is too old +# +ifneq ($(CC_FLAGS_MARCH),-march=z900) +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 +endif + +obj-y := head.o als.o +targets := bzImage startup.a $(obj-y) +subdir- := compressed + +OBJECTS := $(addprefix $(obj)/,$(obj-y)) $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) -$(obj)/compressed/vmlinux: FORCE +$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ +quiet_cmd_ar = AR $@ + cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter-out FORCE, $^) + +$(obj)/startup.a: $(OBJECTS) FORCE + $(call if_changed,ar) + install: $(CONFIGURE) $(obj)/bzImage sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c new file mode 100644 index 000000000000..b6287a99a7b8 --- /dev/null +++ b/arch/s390/boot/als.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2016 + */ +#include +#include +#include +#include +#include +#include + +/* + * The code within this file will be called very early. It may _not_ + * access anything within the bss section, since that is not cleared + * yet and may contain data (e.g. initrd) that must be saved by other + * code. + * For temporary objects the stack (16k) should be used. + */ + +static unsigned long als[] __initdata = { FACILITIES_ALS }; + +static void __init u16_to_hex(char *str, u16 val) +{ + int i, num; + + for (i = 1; i <= 4; i++) { + num = (val >> (16 - 4 * i)) & 0xf; + if (num >= 10) + num += 7; + *str++ = '0' + num; + } + *str = '\0'; +} + +static void __init print_machine_type(void) +{ + static char mach_str[80] __initdata = "Detected machine-type number: "; + char type_str[5]; + struct cpuid id; + + get_cpu_id(&id); + u16_to_hex(type_str, id.machine); + strcat(mach_str, type_str); + strcat(mach_str, "\n"); + sclp_early_printk(mach_str); +} + +static void __init u16_to_decimal(char *str, u16 val) +{ + int div = 1; + + while (div * 10 <= val) + div *= 10; + while (div) { + *str++ = '0' + val / div; + val %= div; + div /= 10; + } + *str = '\0'; +} + +static void __init print_missing_facilities(void) +{ + static char als_str[80] __initdata = "Missing facilities: "; + unsigned long val; + char val_str[6]; + int i, j, first; + + first = 1; + for (i = 0; i < ARRAY_SIZE(als); i++) { + val = ~S390_lowcore.stfle_fac_list[i] & als[i]; + for (j = 0; j < BITS_PER_LONG; j++) { + if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) + continue; + if (!first) + strcat(als_str, ","); + /* + * Make sure we stay within one line. Consider that + * each facility bit adds up to five characters and + * z/VM adds a four character prefix. + */ + if (strlen(als_str) > 70) { + strcat(als_str, "\n"); + sclp_early_printk(als_str); + *als_str = '\0'; + } + u16_to_decimal(val_str, i * BITS_PER_LONG + j); + strcat(als_str, val_str); + first = 0; + } + } + strcat(als_str, "\n"); + sclp_early_printk(als_str); + sclp_early_printk("See Principles of Operations for facility bits\n"); +} + +static void __init facility_mismatch(void) +{ + sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); + print_machine_type(); + print_missing_facilities(); + disabled_wait(0x8badcccc); +} + +void __init verify_facilities(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++) + S390_lowcore.stfle_fac_list[i] = 0; + asm volatile( + " stfl 0(0)\n" + : "=m" (S390_lowcore.stfl_fac_list)); + S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32; + if (S390_lowcore.stfl_fac_list & 0x01000000) { + register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (&S390_lowcore.stfle_fac_list) + : "memory", "cc"); + } + for (i = 0; i < ARRAY_SIZE(als); i++) { + if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) + facility_mismatch(); + } +} diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 9b48e7b86fc8..855f7fdb1f61 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -16,12 +16,12 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o) +OBJECTS := $(objtree)/arch/s390/kernel/ebcdic.o OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) TRIM_HEAD_SIZE := 0x100000 diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S new file mode 100644 index 000000000000..5c42f16a54c4 --- /dev/null +++ b/arch/s390/boot/head.S @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2010 + * + * Author(s): Hartmut Penner + * Martin Schwidefsky + * Rob van der Heij + * Heiko Carstens + * + * There are 5 different IPL methods + * 1) load the image directly into ram at address 0 and do an PSW restart + * 2) linload will load the image from address 0x10000 to memory 0x10000 + * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated) + * 3) generate the tape ipl header, store the generated image on a tape + * and ipl from it + * In case of SL tape you need to IPL 5 times to get past VOL1 etc + * 4) generate the vm reader ipl header, move the generated image to the + * VM reader (use option NOH!) and do a ipl from reader (VM only) + * 5) direct call of start by the SALIPL loader + * We use the cpuid to distinguish between VM and native ipl + * params for kernel are pushed to 0x10400 (see setup.h) + * + */ + +#include +#include +#include +#include +#include +#include + +#define ARCH_OFFSET 4 + +__HEAD + +#define IPL_BS 0x730 + .org 0 + .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded + .long 0x02000018,0x60000050 # by ipl to addresses 0-23. + .long 0x02000068,0x60000050 # (a PSW and two CCWs). + .fill 80-24,1,0x40 # bytes 24-79 are discarded !! + .long 0x020000f0,0x60000050 # The next 160 byte are loaded + .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 + .long 0x02000190,0x60000050 # They form the continuation + .long 0x020001e0,0x60000050 # of the CCW program started + .long 0x02000230,0x60000050 # by ipl and load the range + .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image + .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 + .long 0x02000320,0x60000050 # in memory. At the end of + .long 0x02000370,0x60000050 # the channel program the PSW + .long 0x020003c0,0x60000050 # at location 0 is loaded. + .long 0x02000410,0x60000050 # Initial processing starts + .long 0x02000460,0x60000050 # at 0x200 = iplstart. + .long 0x020004b0,0x60000050 + .long 0x02000500,0x60000050 + .long 0x02000550,0x60000050 + .long 0x020005a0,0x60000050 + .long 0x020005f0,0x60000050 + .long 0x02000640,0x60000050 + .long 0x02000690,0x60000050 + .long 0x020006e0,0x20000050 + + .org 0x200 + +# +# subroutine to wait for end I/O +# +.Lirqwait: + mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw + lpsw .Lwaitpsw +.Lioint: + br %r14 + .align 8 +.Lnewpsw: + .quad 0x0000000080000000,.Lioint +.Lwaitpsw: + .long 0x020a0000,0x80000000+.Lioint + +# +# subroutine for loading cards from the reader +# +.Lloader: + la %r4,0(%r14) + la %r3,.Lorb # r2 = address of orb into r2 + la %r5,.Lirb # r4 = address of irb + la %r6,.Lccws + la %r7,20 +.Linit: + st %r2,4(%r6) # initialize CCW data addresses + la %r2,0x50(%r2) + la %r6,8(%r6) + bct 7,.Linit + + lctl %c6,%c6,.Lcr6 # set IO subclass mask + slr %r2,%r2 +.Lldlp: + ssch 0(%r3) # load chunk of 1600 bytes + bnz .Llderr +.Lwait4irq: + bas %r14,.Lirqwait + c %r1,__LC_SUBCHANNEL_ID # compare subchannel number + bne .Lwait4irq + tsch 0(%r5) + + slr %r0,%r0 + ic %r0,8(%r5) # get device status + chi %r0,8 # channel end ? + be .Lcont + chi %r0,12 # channel end + device end ? + be .Lcont + + l %r0,4(%r5) + s %r0,8(%r3) # r0/8 = number of ccws executed + mhi %r0,10 # *10 = number of bytes in ccws + lh %r3,10(%r5) # get residual count + sr %r0,%r3 # #ccws*80-residual=#bytes read + ar %r2,%r0 + + br %r4 # r2 contains the total size + +.Lcont: + ahi %r2,0x640 # add 0x640 to total size + la %r6,.Lccws + la %r7,20 +.Lincr: + l %r0,4(%r6) # update CCW data addresses + ahi %r0,0x640 + st %r0,4(%r6) + ahi %r6,8 + bct 7,.Lincr + + b .Lldlp +.Llderr: + lpsw .Lcrash + + .align 8 +.Lorb: .long 0x00000000,0x0080ff00,.Lccws +.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lcr6: .long 0xff000000 +.Lloadp:.long 0,0 + .align 8 +.Lcrash:.long 0x000a0000,0x00000000 + + .align 8 +.Lccws: .rept 19 + .long 0x02600050,0x00000000 + .endr + .long 0x02200050,0x00000000 + +iplstart: + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam31 # switch to 31 bit addressing mode + lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number + bct %r1,.Lnoload # is valid + l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number + la %r2,IPL_BS # load start address + bas %r14,.Lloader # load rest of ipl image + l %r12,.Lparm # pointer to parameter area + st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number + +# +# load parameter file from ipl device +# +.Lagain1: + l %r2,.Linitrd # ramdisk loc. is temp + bas %r14,.Lloader # load parameter file + ltr %r2,%r2 # got anything ? + bz .Lnopf + chi %r2,895 + bnh .Lnotrunc + la %r2,895 +.Lnotrunc: + l %r4,.Linitrd + clc 0(3,%r4),.L_hdr # if it is HDRx + bz .Lagain1 # skip dataset header + clc 0(3,%r4),.L_eof # if it is EOFx + bz .Lagain1 # skip dateset trailer + la %r5,0(%r4,%r2) + lr %r3,%r2 + la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line + mvc 0(256,%r3),0(%r4) + mvc 256(256,%r3),256(%r4) + mvc 512(256,%r3),512(%r4) + mvc 768(122,%r3),768(%r4) + slr %r0,%r0 + b .Lcntlp +.Ldelspc: + ic %r0,0(%r2,%r3) + chi %r0,0x20 # is it a space ? + be .Lcntlp + ahi %r2,1 + b .Leolp +.Lcntlp: + brct %r2,.Ldelspc +.Leolp: + slr %r0,%r0 + stc %r0,0(%r2,%r3) # terminate buffer +.Lnopf: + +# +# load ramdisk from ipl device +# +.Lagain2: + l %r2,.Linitrd # addr of ramdisk + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) + bas %r14,.Lloader # load ramdisk + st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd + ltr %r2,%r2 + bnz .Lrdcont + st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found +.Lrdcont: + l %r2,.Linitrd + + clc 0(3,%r2),.L_hdr # skip HDRx and EOFx + bz .Lagain2 + clc 0(3,%r2),.L_eof + bz .Lagain2 + +# +# reset files in VM reader +# + stidp .Lcpuid # store cpuid + tm .Lcpuid,0xff # running VM ? + bno .Lnoreset + la %r2,.Lreset + lhi %r3,26 + diag %r2,%r3,8 + la %r5,.Lirb + stsch 0(%r5) # check if irq is pending + tm 30(%r5),0x0f # by verifying if any of the + bnz .Lwaitforirq # activity or status control + tm 31(%r5),0xff # bits is set in the schib + bz .Lnoreset +.Lwaitforirq: + bas %r14,.Lirqwait # wait for IO interrupt + c %r1,__LC_SUBCHANNEL_ID # compare subchannel number + bne .Lwaitforirq + la %r5,.Lirb + tsch 0(%r5) +.Lnoreset: + b .Lnoload + +# +# everything loaded, go for it +# +.Lnoload: + l %r1,.Lstartup + br %r1 + +.Linitrd:.long _end # default address of initrd +.Lparm: .long PARMAREA +.Lstartup: .long startup +.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 + .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 + .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" +.L_eof: .long 0xc5d6c600 /* C'EOF' */ +.L_hdr: .long 0xc8c4d900 /* C'HDR' */ + .align 8 +.Lcpuid:.fill 8,1,0 + +# +# startup-code at 0x10000, running in absolute addressing mode +# this is called either by the ipl loader or directly by PSW restart +# or linload or SALIPL +# + .org 0x10000 +ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: + mvi __LC_AR_MODE_ID,1 # set esame flag + slr %r0,%r0 # set cpuid to zero + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # switch to esame mode + bras %r13,0f + .fill 16,4,0x0 +0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs + sam64 # switch to 64 bit addressing mode + basr %r13,0 # get base +.LPG0: + xc 0x200(256),0x200 # partially clear lowcore + xc 0x300(256),0x300 + xc 0xe00(256),0xe00 + xc 0xf00(256),0xf00 + lctlg %c0,%c15,0x200(%r0) # initialize control registers + stcke __LC_BOOT_CLOCK + mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + spt 6f-.LPG0(%r13) + mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) + l %r15,.Lstack-.LPG0(%r13) + ahi %r15,-STACK_FRAME_OVERHEAD + brasl %r14,verify_facilities +# For uncompressed images, continue in +# arch/s390/kernel/head64.S. For compressed images, continue in +# arch/s390/boot/compressed/head.S. + jg startup_continue + +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) + .align 8 +6: .long 0x7fffffff,0xffffffff + +#include "head_kdump.S" + +# +# params at 10400 (setup.h) +# + .org PARMAREA + .long 0,0 # IPL_DEVICE + .long 0,0 # INITRD_START + .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE + + .org COMMAND_LINE + .byte "root=/dev/ram0 ro" + .byte 0 + + .org 0x11000 diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S new file mode 100644 index 000000000000..174d6959bf5b --- /dev/null +++ b/arch/s390/boot/head_kdump.S @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu + */ + +#include + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... +.align 8 +.Lrestart_psw: + .quad 0x0000000080000000,0x0000000000000000 + startup +#else +.align 2 +.Lep_startup_kdump: + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2fed39b26b42..dbfd1730e631 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -9,38 +9,20 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code -CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif -GCOV_PROFILE_als.o := n GCOV_PROFILE_early.o := n GCOV_PROFILE_early_nobss.o := n -KCOV_INSTRUMENT_als.o := n KCOV_INSTRUMENT_early.o := n KCOV_INSTRUMENT_early_nobss.o := n -UBSAN_SANITIZE_als.o := n UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n -# -# Use -march=z900 for als.c to be able to print an error -# message if the kernel is started on a machine which is too old -# -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) -CFLAGS_als.o += -march=z900 -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += -march=z900 -endif - -CFLAGS_als.o += -D__NO_FORTIFY - # # Passing null pointers is ok for smp code, since we access the lowcore here. # @@ -61,13 +43,13 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o -extra-y += head.o head64.o vmlinux.lds +extra-y += head64.o vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -99,5 +81,5 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ -chkbss := head.o head64.o als.o early_nobss.o +chkbss := head64.o early_nobss.o include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/als.c b/arch/s390/kernel/als.c deleted file mode 100644 index d1892bf36cab..000000000000 --- a/arch/s390/kernel/als.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2016 - */ -#include -#include -#include -#include -#include -#include -#include "entry.h" - -/* - * The code within this file will be called very early. It may _not_ - * access anything within the bss section, since that is not cleared - * yet and may contain data (e.g. initrd) that must be saved by other - * code. - * For temporary objects the stack (16k) should be used. - */ - -static unsigned long als[] __initdata = { FACILITIES_ALS }; - -static void __init u16_to_hex(char *str, u16 val) -{ - int i, num; - - for (i = 1; i <= 4; i++) { - num = (val >> (16 - 4 * i)) & 0xf; - if (num >= 10) - num += 7; - *str++ = '0' + num; - } - *str = '\0'; -} - -static void __init print_machine_type(void) -{ - static char mach_str[80] __initdata = "Detected machine-type number: "; - char type_str[5]; - struct cpuid id; - - get_cpu_id(&id); - u16_to_hex(type_str, id.machine); - strcat(mach_str, type_str); - strcat(mach_str, "\n"); - sclp_early_printk(mach_str); -} - -static void __init u16_to_decimal(char *str, u16 val) -{ - int div = 1; - - while (div * 10 <= val) - div *= 10; - while (div) { - *str++ = '0' + val / div; - val %= div; - div /= 10; - } - *str = '\0'; -} - -static void __init print_missing_facilities(void) -{ - static char als_str[80] __initdata = "Missing facilities: "; - unsigned long val; - char val_str[6]; - int i, j, first; - - first = 1; - for (i = 0; i < ARRAY_SIZE(als); i++) { - val = ~S390_lowcore.stfle_fac_list[i] & als[i]; - for (j = 0; j < BITS_PER_LONG; j++) { - if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) - continue; - if (!first) - strcat(als_str, ","); - /* - * Make sure we stay within one line. Consider that - * each facility bit adds up to five characters and - * z/VM adds a four character prefix. - */ - if (strlen(als_str) > 70) { - strcat(als_str, "\n"); - sclp_early_printk(als_str); - *als_str = '\0'; - } - u16_to_decimal(val_str, i * BITS_PER_LONG + j); - strcat(als_str, val_str); - first = 0; - } - } - strcat(als_str, "\n"); - sclp_early_printk(als_str); - sclp_early_printk("See Principles of Operations for facility bits\n"); -} - -static void __init facility_mismatch(void) -{ - sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); - print_machine_type(); - print_missing_facilities(); - disabled_wait(0x8badcccc); -} - -void __init verify_facilities(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++) - S390_lowcore.stfle_fac_list[i] = 0; - asm volatile( - " stfl 0(0)\n" - : "=m" (S390_lowcore.stfl_fac_list)); - S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32; - if (S390_lowcore.stfl_fac_list & 0x01000000) { - register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1; - - asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ - : "+d" (reg0) - : "a" (&S390_lowcore.stfle_fac_list) - : "memory", "cc"); - } - for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) - facility_mismatch(); - } -} diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 961abfac2c5f..472fa2f1a4a5 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void verify_facilities(void); void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S deleted file mode 100644 index 5c42f16a54c4..000000000000 --- a/arch/s390/kernel/head.S +++ /dev/null @@ -1,339 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 1999, 2010 - * - * Author(s): Hartmut Penner - * Martin Schwidefsky - * Rob van der Heij - * Heiko Carstens - * - * There are 5 different IPL methods - * 1) load the image directly into ram at address 0 and do an PSW restart - * 2) linload will load the image from address 0x10000 to memory 0x10000 - * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated) - * 3) generate the tape ipl header, store the generated image on a tape - * and ipl from it - * In case of SL tape you need to IPL 5 times to get past VOL1 etc - * 4) generate the vm reader ipl header, move the generated image to the - * VM reader (use option NOH!) and do a ipl from reader (VM only) - * 5) direct call of start by the SALIPL loader - * We use the cpuid to distinguish between VM and native ipl - * params for kernel are pushed to 0x10400 (see setup.h) - * - */ - -#include -#include -#include -#include -#include -#include - -#define ARCH_OFFSET 4 - -__HEAD - -#define IPL_BS 0x730 - .org 0 - .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded - .long 0x02000018,0x60000050 # by ipl to addresses 0-23. - .long 0x02000068,0x60000050 # (a PSW and two CCWs). - .fill 80-24,1,0x40 # bytes 24-79 are discarded !! - .long 0x020000f0,0x60000050 # The next 160 byte are loaded - .long 0x02000140,0x60000050 # to addresses 0x18-0xb7 - .long 0x02000190,0x60000050 # They form the continuation - .long 0x020001e0,0x60000050 # of the CCW program started - .long 0x02000230,0x60000050 # by ipl and load the range - .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image - .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730 - .long 0x02000320,0x60000050 # in memory. At the end of - .long 0x02000370,0x60000050 # the channel program the PSW - .long 0x020003c0,0x60000050 # at location 0 is loaded. - .long 0x02000410,0x60000050 # Initial processing starts - .long 0x02000460,0x60000050 # at 0x200 = iplstart. - .long 0x020004b0,0x60000050 - .long 0x02000500,0x60000050 - .long 0x02000550,0x60000050 - .long 0x020005a0,0x60000050 - .long 0x020005f0,0x60000050 - .long 0x02000640,0x60000050 - .long 0x02000690,0x60000050 - .long 0x020006e0,0x20000050 - - .org 0x200 - -# -# subroutine to wait for end I/O -# -.Lirqwait: - mvc __LC_IO_NEW_PSW(16),.Lnewpsw # set up IO interrupt psw - lpsw .Lwaitpsw -.Lioint: - br %r14 - .align 8 -.Lnewpsw: - .quad 0x0000000080000000,.Lioint -.Lwaitpsw: - .long 0x020a0000,0x80000000+.Lioint - -# -# subroutine for loading cards from the reader -# -.Lloader: - la %r4,0(%r14) - la %r3,.Lorb # r2 = address of orb into r2 - la %r5,.Lirb # r4 = address of irb - la %r6,.Lccws - la %r7,20 -.Linit: - st %r2,4(%r6) # initialize CCW data addresses - la %r2,0x50(%r2) - la %r6,8(%r6) - bct 7,.Linit - - lctl %c6,%c6,.Lcr6 # set IO subclass mask - slr %r2,%r2 -.Lldlp: - ssch 0(%r3) # load chunk of 1600 bytes - bnz .Llderr -.Lwait4irq: - bas %r14,.Lirqwait - c %r1,__LC_SUBCHANNEL_ID # compare subchannel number - bne .Lwait4irq - tsch 0(%r5) - - slr %r0,%r0 - ic %r0,8(%r5) # get device status - chi %r0,8 # channel end ? - be .Lcont - chi %r0,12 # channel end + device end ? - be .Lcont - - l %r0,4(%r5) - s %r0,8(%r3) # r0/8 = number of ccws executed - mhi %r0,10 # *10 = number of bytes in ccws - lh %r3,10(%r5) # get residual count - sr %r0,%r3 # #ccws*80-residual=#bytes read - ar %r2,%r0 - - br %r4 # r2 contains the total size - -.Lcont: - ahi %r2,0x640 # add 0x640 to total size - la %r6,.Lccws - la %r7,20 -.Lincr: - l %r0,4(%r6) # update CCW data addresses - ahi %r0,0x640 - st %r0,4(%r6) - ahi %r6,8 - bct 7,.Lincr - - b .Lldlp -.Llderr: - lpsw .Lcrash - - .align 8 -.Lorb: .long 0x00000000,0x0080ff00,.Lccws -.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -.Lcr6: .long 0xff000000 -.Lloadp:.long 0,0 - .align 8 -.Lcrash:.long 0x000a0000,0x00000000 - - .align 8 -.Lccws: .rept 19 - .long 0x02600050,0x00000000 - .endr - .long 0x02200050,0x00000000 - -iplstart: - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam31 # switch to 31 bit addressing mode - lh %r1,__LC_SUBCHANNEL_ID # test if subchannel number - bct %r1,.Lnoload # is valid - l %r1,__LC_SUBCHANNEL_ID # load ipl subchannel number - la %r2,IPL_BS # load start address - bas %r14,.Lloader # load rest of ipl image - l %r12,.Lparm # pointer to parameter area - st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number - -# -# load parameter file from ipl device -# -.Lagain1: - l %r2,.Linitrd # ramdisk loc. is temp - bas %r14,.Lloader # load parameter file - ltr %r2,%r2 # got anything ? - bz .Lnopf - chi %r2,895 - bnh .Lnotrunc - la %r2,895 -.Lnotrunc: - l %r4,.Linitrd - clc 0(3,%r4),.L_hdr # if it is HDRx - bz .Lagain1 # skip dataset header - clc 0(3,%r4),.L_eof # if it is EOFx - bz .Lagain1 # skip dateset trailer - la %r5,0(%r4,%r2) - lr %r3,%r2 - la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line - mvc 0(256,%r3),0(%r4) - mvc 256(256,%r3),256(%r4) - mvc 512(256,%r3),512(%r4) - mvc 768(122,%r3),768(%r4) - slr %r0,%r0 - b .Lcntlp -.Ldelspc: - ic %r0,0(%r2,%r3) - chi %r0,0x20 # is it a space ? - be .Lcntlp - ahi %r2,1 - b .Leolp -.Lcntlp: - brct %r2,.Ldelspc -.Leolp: - slr %r0,%r0 - stc %r0,0(%r2,%r3) # terminate buffer -.Lnopf: - -# -# load ramdisk from ipl device -# -.Lagain2: - l %r2,.Linitrd # addr of ramdisk - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) - bas %r14,.Lloader # load ramdisk - st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd - ltr %r2,%r2 - bnz .Lrdcont - st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found -.Lrdcont: - l %r2,.Linitrd - - clc 0(3,%r2),.L_hdr # skip HDRx and EOFx - bz .Lagain2 - clc 0(3,%r2),.L_eof - bz .Lagain2 - -# -# reset files in VM reader -# - stidp .Lcpuid # store cpuid - tm .Lcpuid,0xff # running VM ? - bno .Lnoreset - la %r2,.Lreset - lhi %r3,26 - diag %r2,%r3,8 - la %r5,.Lirb - stsch 0(%r5) # check if irq is pending - tm 30(%r5),0x0f # by verifying if any of the - bnz .Lwaitforirq # activity or status control - tm 31(%r5),0xff # bits is set in the schib - bz .Lnoreset -.Lwaitforirq: - bas %r14,.Lirqwait # wait for IO interrupt - c %r1,__LC_SUBCHANNEL_ID # compare subchannel number - bne .Lwaitforirq - la %r5,.Lirb - tsch 0(%r5) -.Lnoreset: - b .Lnoload - -# -# everything loaded, go for it -# -.Lnoload: - l %r1,.Lstartup - br %r1 - -.Linitrd:.long _end # default address of initrd -.Lparm: .long PARMAREA -.Lstartup: .long startup -.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40 - .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6 - .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold" -.L_eof: .long 0xc5d6c600 /* C'EOF' */ -.L_hdr: .long 0xc8c4d900 /* C'HDR' */ - .align 8 -.Lcpuid:.fill 8,1,0 - -# -# startup-code at 0x10000, running in absolute addressing mode -# this is called either by the ipl loader or directly by PSW restart -# or linload or SALIPL -# - .org 0x10000 -ENTRY(startup) - j .Lep_startup_normal - .org 0x10008 -# -# This is a list of s390 kernel entry points. At address 0x1000f the number of -# valid entry points is stored. -# -# IMPORTANT: Do not change this table, it is s390 kernel ABI! -# - .ascii "S390EP" - .byte 0x00,0x01 -# -# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode -# - .org 0x10010 -ENTRY(startup_kdump) - j .Lep_startup_kdump -.Lep_startup_normal: - mvi __LC_AR_MODE_ID,1 # set esame flag - slr %r0,%r0 # set cpuid to zero - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,0x12 # switch to esame mode - bras %r13,0f - .fill 16,4,0x0 -0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs - sam64 # switch to 64 bit addressing mode - basr %r13,0 # get base -.LPG0: - xc 0x200(256),0x200 # partially clear lowcore - xc 0x300(256),0x300 - xc 0xe00(256),0xe00 - xc 0xf00(256),0xf00 - lctlg %c0,%c15,0x200(%r0) # initialize control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 - spt 6f-.LPG0(%r13) - mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - l %r15,.Lstack-.LPG0(%r13) - ahi %r15,-STACK_FRAME_OVERHEAD - brasl %r14,verify_facilities -# For uncompressed images, continue in -# arch/s390/kernel/head64.S. For compressed images, continue in -# arch/s390/boot/compressed/head.S. - jg startup_continue - -.Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) - .align 8 -6: .long 0x7fffffff,0xffffffff - -#include "head_kdump.S" - -# -# params at 10400 (setup.h) -# - .org PARMAREA - .long 0,0 # IPL_DEVICE - .long 0,0 # INITRD_START - .long 0,0 # INITRD_SIZE - .long 0,0 # OLDMEM_BASE - .long 0,0 # OLDMEM_SIZE - - .org COMMAND_LINE - .byte "root=/dev/ram0 ro" - .byte 0 - - .org 0x11000 diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 53e5cb0d6f7f..6a840bcb2715 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,7 @@ #include __HEAD - .org 0x100000 - 0x11000 # head.o ends at 0x11000 + .org 0x100000 ENTRY(startup_continue) tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S deleted file mode 100644 index 174d6959bf5b..000000000000 --- a/arch/s390/kernel/head_kdump.S +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * S390 kdump lowlevel functions (new kernel) - * - * Copyright IBM Corp. 2011 - * Author(s): Michael Holzheu - */ - -#include - -#define DATAMOVER_ADDR 0x4000 -#define COPY_PAGE_ADDR 0x6000 - -#ifdef CONFIG_CRASH_DUMP - -# -# kdump entry (new kernel - not yet relocated) -# -# Note: This code has to be position independent -# - -.align 2 -.Lep_startup_kdump: - lhi %r1,2 # mode 2 = esame (dump) - sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode - sam64 # Switch to 64 bit addressing - basr %r13,0 -.Lbase: - larl %r2,.Lbase_addr # Check, if we have been - lg %r2,0(%r2) # already relocated: - clgr %r2,%r13 # - jne .Lrelocate # No : Start data mover - lghi %r2,0 # Yes: Start kdump kernel - brasl %r14,startup_kdump_relocated - -.Lrelocate: - larl %r4,startup - lg %r2,0x418(%r4) # Get kdump base - lg %r3,0x420(%r4) # Get kdump size - - larl %r10,.Lcopy_start # Source of data mover - lghi %r8,DATAMOVER_ADDR # Target of data mover - mvc 0(256,%r8),0(%r10) # Copy data mover code - - agr %r8,%r2 # Copy data mover to - mvc 0(256,%r8),0(%r10) # reserved mem - - lghi %r14,DATAMOVER_ADDR # Jump to copied data mover - basr %r14,%r14 -.Lbase_addr: - .quad .Lbase - -# -# kdump data mover code (runs at address DATAMOVER_ADDR) -# -# r2: kdump base address -# r3: kdump size -# -.Lcopy_start: - basr %r13,0 # Base -0: - lgr %r11,%r2 # Save kdump base address - lgr %r12,%r2 - agr %r12,%r3 # Compute kdump end address - - lghi %r5,0 - lghi %r10,COPY_PAGE_ADDR # Load copy page address -1: - mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp - mvc 0(256,%r5),0(%r11) # Copy new kernel to old - mvc 0(256,%r11),0(%r10) # Copy tmp to new - aghi %r11,256 - aghi %r5,256 - clgr %r11,%r12 - jl 1b - - lg %r14,.Lstartup_kdump-0b(%r13) - basr %r14,%r14 # Start relocated kernel -.Lstartup_kdump: - .long 0x00000000,0x00000000 + startup_kdump_relocated -.Lcopy_end: - -# -# Startup of kdump (relocated new kernel) -# -.align 2 -startup_kdump_relocated: - basr %r13,0 -0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel... -.align 8 -.Lrestart_psw: - .quad 0x0000000080000000,0x0000000000000000 + startup -#else -.align 2 -.Lep_startup_kdump: - larl %r13,startup_kdump_crash - lpswe 0(%r13) -.align 8 -startup_kdump_crash: - .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash -#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index f0414f52817b..31f530b8a1b3 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -19,7 +19,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(startup) +ENTRY(startup_continue) jiffies = jiffies_64; PHDRS { -- cgit v1.2.3-59-g8ed1b From a5802353b1ee8e4c97e5c255bed7651b924098fa Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 13 Jun 2018 06:25:36 +0200 Subject: s390/decompressor: rename entry point to startup_decompressor Rename the decompressor entry point to startup_decompressor to avoid confusion, leaving startup_continue as the entry point of the uncompressed image. Also remove obsolete comment, as the decompressor code is unconditionally called from boot/head.S now. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/head.S | 2 +- arch/s390/boot/head.S | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 1c69907dba28..df8dbbc17bcc 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -15,7 +15,7 @@ #include "sizes.h" __HEAD -ENTRY(startup_continue) +ENTRY(startup_decompressor) basr %r13,0 # get base .LPG1: # setup stack diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 5c42f16a54c4..8d98463ca7d0 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -310,10 +310,7 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities -# For uncompressed images, continue in -# arch/s390/kernel/head64.S. For compressed images, continue in -# arch/s390/boot/compressed/head.S. - jg startup_continue + jg startup_decompressor .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) -- cgit v1.2.3-59-g8ed1b From 2dd26659e3167526212b7b3a18a5c747058aff1e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 15:14:51 +0200 Subject: s390/als: avoid .init.* sections usage Since als.c is the part of the decompressor only, there is no point in using init sections for code and data. That's just creating extra sections in the decompressor image. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/als.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index b6287a99a7b8..d592e0d90d9f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -3,7 +3,6 @@ * Copyright IBM Corp. 2016 */ #include -#include #include #include #include @@ -17,9 +16,9 @@ * For temporary objects the stack (16k) should be used. */ -static unsigned long als[] __initdata = { FACILITIES_ALS }; +static unsigned long als[] = { FACILITIES_ALS }; -static void __init u16_to_hex(char *str, u16 val) +static void u16_to_hex(char *str, u16 val) { int i, num; @@ -32,9 +31,9 @@ static void __init u16_to_hex(char *str, u16 val) *str = '\0'; } -static void __init print_machine_type(void) +static void print_machine_type(void) { - static char mach_str[80] __initdata = "Detected machine-type number: "; + static char mach_str[80] = "Detected machine-type number: "; char type_str[5]; struct cpuid id; @@ -45,7 +44,7 @@ static void __init print_machine_type(void) sclp_early_printk(mach_str); } -static void __init u16_to_decimal(char *str, u16 val) +static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -59,9 +58,9 @@ static void __init u16_to_decimal(char *str, u16 val) *str = '\0'; } -static void __init print_missing_facilities(void) +static void print_missing_facilities(void) { - static char als_str[80] __initdata = "Missing facilities: "; + static char als_str[80] = "Missing facilities: "; unsigned long val; char val_str[6]; int i, j, first; @@ -94,7 +93,7 @@ static void __init print_missing_facilities(void) sclp_early_printk("See Principles of Operations for facility bits\n"); } -static void __init facility_mismatch(void) +static void facility_mismatch(void) { sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); print_machine_type(); @@ -102,7 +101,7 @@ static void __init facility_mismatch(void) disabled_wait(0x8badcccc); } -void __init verify_facilities(void) +void verify_facilities(void) { int i; -- cgit v1.2.3-59-g8ed1b From a1d7d91f105413750b5c8fb6a13a8c969a8b1f81 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 15:52:01 +0200 Subject: s390/decompressor: avoid reusing uncompressed image objects Re-compile ebcdic.c and sclp_early_core.c for the decompressor, using proper decompressor CFLAGS. This also allows to potentially use instrumentation for those files when built for the main kernel image. With kbuild there is no easy way to re-compile a source file from another directory. Bypass ugly rules and Makefile meta-programming with relative path includes of original files. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 16 ++++++++++------ arch/s390/boot/compressed/Makefile | 4 +--- arch/s390/boot/ebcdic.c | 2 ++ arch/s390/boot/sclp_early_core.c | 2 ++ drivers/s390/char/Makefile | 5 ----- 5 files changed, 15 insertions(+), 14 deletions(-) create mode 100644 arch/s390/boot/ebcdic.c create mode 100644 arch/s390/boot/sclp_early_core.c (limited to 'arch/s390') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 05e92d43d383..5cf30b732eb6 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -15,14 +15,18 @@ KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) # message if the kernel is started on a machine which is too old # ifneq ($(CC_FLAGS_MARCH),-march=z900) -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += -march=z900 -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_als.o += -march=z900 +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp_early_core.o += -march=z900 endif -obj-y := head.o als.o -targets := bzImage startup.a $(obj-y) +CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char + +obj-y := head.o als.o ebcdic.o sclp_early_core.o +targets := bzImage setup.a $(obj-y) subdir- := compressed OBJECTS := $(addprefix $(obj)/,$(obj-y)) diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 855f7fdb1f61..86fe47509f75 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -16,9 +16,7 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(objtree)/arch/s390/kernel/ebcdic.o -OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o -OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o +OBJECTS := $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c new file mode 100644 index 000000000000..7391e7d36086 --- /dev/null +++ b/arch/s390/boot/ebcdic.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ebcdic.c" diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c new file mode 100644 index 000000000000..5a19fd7020b5 --- /dev/null +++ b/arch/s390/boot/sclp_early_core.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../drivers/s390/char/sclp_early_core.c" diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 0a4c13e1e76e..c6ab34f94b1b 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -12,11 +12,6 @@ GCOV_PROFILE_sclp_early_core.o := n KCOV_INSTRUMENT_sclp_early_core.o := n UBSAN_SANITIZE_sclp_early_core.o := n -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += -march=z900 -endif - CFLAGS_sclp_early_core.o += -D__NO_FORTIFY CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_EXPOLINE) -- cgit v1.2.3-59-g8ed1b From cad5b35da9f7e429131592421c463a93703bb02e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 22:58:50 +0200 Subject: s390/decompressor: reuse lib/mem.S for mem functions Reusing arch/s390/lib/mem.S file solves a problem that sclp_early_core.c and its dependencies have to be compiled with -march=z900 (no need to compile compressed/misc.c with -march=z900). This also allows to avoid mem functions duplicates, makes code a bit smaller and optimized mem functions are utilized. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 4 +++- arch/s390/boot/compressed/misc.c | 37 ------------------------------------- arch/s390/boot/mem.S | 2 ++ 3 files changed, 5 insertions(+), 38 deletions(-) create mode 100644 arch/s390/boot/mem.S (limited to 'arch/s390') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 5cf30b732eb6..70b50b41eda9 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -17,6 +17,8 @@ KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) ifneq ($(CC_FLAGS_MARCH),-march=z900) AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 +AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) +AFLAGS_mem.o += -march=z900 CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) CFLAGS_als.o += -march=z900 CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) @@ -25,7 +27,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o ebcdic.o sclp_early_core.o +obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o targets := bzImage setup.a $(obj-y) subdir- := compressed diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 511b2cc9b91a..f66ad73c205b 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -71,43 +71,6 @@ static int puts(const char *s) return 0; } -void *memset(void *s, int c, size_t n) -{ - char *xs; - - xs = s; - while (n--) - *xs++ = c; - return s; -} - -void *memcpy(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - while (n--) - *d++ = *s++; - return dest; -} - -void *memmove(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - if (d <= s) { - while (n--) - *d++ = *s++; - } else { - d += n; - s += n; - while (n--) - *--d = *--s; - } - return dest; -} - static void error(char *x) { unsigned long long psw = 0x000a0000deadbeefULL; diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S new file mode 100644 index 000000000000..b33463633f03 --- /dev/null +++ b/arch/s390/boot/mem.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../lib/mem.S" -- cgit v1.2.3-59-g8ed1b From 4560ff1386e35f51f7bafa0107fe4e0eb31db090 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 21:52:45 +0200 Subject: s390/decompressor: avoid repeating objects list in Makefile Optimize the decompressor's Makefile to have a single objects list. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 86fe47509f75..7cfca96b48ad 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,14 +9,15 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n +obj-y := head.o misc.o piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += misc.o piggy.o sizes.h head.o +targets += sizes.h $(obj-y) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o +OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) -- cgit v1.2.3-59-g8ed1b From 78c95647e76d830e8193ac254a012b0e56d5675d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 May 2018 10:50:02 +0200 Subject: s390: add custom target and make path extension optional for .bss check "chkbss-target" could be now used to override default target .bss check is bound to. Objects to be checked are only path extended when needed. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/scripts/Makefile.chkbss | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index d92f2d94a5d9..fb0a11bd520c 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -8,7 +8,12 @@ define cmd_chkbss touch $@; endef -$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +chkbss-target ?= $(obj)/built-in.a +ifneq (,$(findstring /,$(chkbss))) +$(chkbss-target): $(patsubst %, %.chkbss, $(chkbss)) +else +$(chkbss-target): $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +endif %.o.chkbss: %.o $(call cmd,chkbss) -- cgit v1.2.3-59-g8ed1b From 0580bce1312f75362662881c938192856167cf7e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 May 2018 10:57:44 +0200 Subject: s390/decompressor: extend .bss check for early code Cover the decompressor code with no .bss usage compile time check. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 4 ++++ arch/s390/boot/compressed/Makefile | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 70b50b41eda9..a1ca3b805c74 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -48,3 +48,7 @@ $(obj)/startup.a: $(OBJECTS) FORCE install: $(CONFIGURE) $(obj)/bzImage sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" + +chkbss := $(OBJECTS) +chkbss-target := $(obj)/startup.a +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 7cfca96b48ad..9665131b3cb2 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -74,3 +74,7 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) $(call if_changed,ld) + +chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) +chkbss-target := $(obj)/vmlinux.bin +include $(srctree)/arch/s390/scripts/Makefile.chkbss -- cgit v1.2.3-59-g8ed1b From b8326bf52e0894004c790183228c1ec18aae7f72 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 21:59:13 +0200 Subject: s390/decompressor: allow preprocessor in piggy.o linker script Rename vmlinux.scr to vmlinux.scr.lds.S and add it to build rules to enable preprocessor for it. Also add vmlinux.scr.lds to local .gitignore Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/.gitignore | 1 + arch/s390/boot/compressed/Makefile | 4 ++-- arch/s390/boot/compressed/vmlinux.scr | 11 ----------- arch/s390/boot/compressed/vmlinux.scr.lds.S | 11 +++++++++++ 4 files changed, 14 insertions(+), 13 deletions(-) delete mode 100644 arch/s390/boot/compressed/vmlinux.scr create mode 100644 arch/s390/boot/compressed/vmlinux.scr.lds.S (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 2088cc140629..45aeb4f08752 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,4 +1,5 @@ sizes.h vmlinux vmlinux.lds +vmlinux.scr.lds vmlinux.bin.full diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 9665131b3cb2..704c1f4dd802 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -12,7 +12,7 @@ UBSAN_SANITIZE := n obj-y := head.o misc.o piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += sizes.h $(obj-y) +targets += vmlinux.scr.lds sizes.h $(obj-y) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) @@ -72,7 +72,7 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin.$(suffix-y) $(call if_changed,ld) chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr deleted file mode 100644 index 42a242597f34..000000000000 --- a/arch/s390/boot/compressed/vmlinux.scr +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -SECTIONS -{ - .rodata.compressed : { - input_len = .; - LONG(input_data_end - input_data) input_data = .; - *(.data) - output_len = . - 4; - input_data_end = .; - } -} diff --git a/arch/s390/boot/compressed/vmlinux.scr.lds.S b/arch/s390/boot/compressed/vmlinux.scr.lds.S new file mode 100644 index 000000000000..42a242597f34 --- /dev/null +++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +SECTIONS +{ + .rodata.compressed : { + input_len = .; + LONG(input_data_end - input_data) input_data = .; + *(.data) + output_len = . - 4; + input_data_end = .; + } +} -- cgit v1.2.3-59-g8ed1b From c98b6c679a3b8032a2dd5c9724f489e1b959e5a7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 13 Jun 2018 06:34:45 +0200 Subject: s390/decompressor: allow to pack uncompressed vmlinux.bin into piggy.o If none of compression methods defined, leave suffix-y empty, so that plain uncompressed vmlinux.bin could be reused instead. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 704c1f4dd802..438e1a50e0b5 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -51,12 +51,12 @@ $(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full vmlinux.bin.all-y := $(obj)/vmlinux.bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_BZIP2) := bz2 -suffix-$(CONFIG_KERNEL_LZ4) := lz4 -suffix-$(CONFIG_KERNEL_LZMA) := lzma -suffix-$(CONFIG_KERNEL_LZO) := lzo -suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) @@ -72,7 +72,7 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y) $(call if_changed,ld) chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) -- cgit v1.2.3-59-g8ed1b From 89b5202e81df9f0f0f0a11cf8c78bc8bfdc52a21 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 13 Jun 2018 00:00:25 +0200 Subject: s390/decompressor: support uncompressed kernel Implement uncompressed kernel support (when "None" is picked in kernel compression mode list). In that case an actual decompression code is skipped and control is passed from boot/head.S to startup_continue in kernel/head64.S. To achieve that uncompressed kernel payload is conditionally put at 0x100000 in bzImage. In reality this is very close to classic uncompressed kernel "image", but the decompressor has its own build and link process, kernel/head64.S lives at 0x100000 rather than at 0x11000, and .bss section is reused for both stages. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/boot/compressed/Makefile | 4 ++-- arch/s390/boot/compressed/vmlinux.lds.S | 12 ++++++++---- arch/s390/boot/compressed/vmlinux.scr.lds.S | 4 ++++ arch/s390/boot/head.S | 4 ++++ 5 files changed, 19 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index baed39772c84..3023fa00e4b5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -146,6 +146,7 @@ config S390 select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 438e1a50e0b5..9d3bd7d066e9 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -9,10 +9,10 @@ KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n -obj-y := head.o misc.o piggy.o +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += vmlinux.scr.lds sizes.h $(obj-y) +targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d43c2db12d30..b8572a81be5f 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -23,13 +23,10 @@ SECTIONS *(.text.*) _etext = . ; } - .rodata.compressed : { - *(.rodata.compressed) - } .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - *(.rodata.*) + EXCLUDE_FILE (*piggy.o) *(.rodata.*) _erodata = . ; } .data : { @@ -38,6 +35,13 @@ SECTIONS *(.data.*) _edata = . ; } + startup_continue = 0x100000; +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = 0x100000; +#endif + .rodata.compressed : { + *(.rodata.compressed) + } . = ALIGN(256); .bss : { _bss = . ; diff --git a/arch/s390/boot/compressed/vmlinux.scr.lds.S b/arch/s390/boot/compressed/vmlinux.scr.lds.S index 42a242597f34..ff01d18c9222 100644 --- a/arch/s390/boot/compressed/vmlinux.scr.lds.S +++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S @@ -2,10 +2,14 @@ SECTIONS { .rodata.compressed : { +#ifndef CONFIG_KERNEL_UNCOMPRESSED input_len = .; LONG(input_data_end - input_data) input_data = .; +#endif *(.data) +#ifndef CONFIG_KERNEL_UNCOMPRESSED output_len = . - 4; input_data_end = .; +#endif } } diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 8d98463ca7d0..f09e792df495 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -310,7 +310,11 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities +#ifdef CONFIG_KERNEL_UNCOMPRESSED + jg startup_continue +#else jg startup_decompressor +#endif .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) -- cgit v1.2.3-59-g8ed1b From 6075e4ae6e729778ac687a9ffb0c0d5223a0a19e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jun 2018 12:05:00 +0200 Subject: s390: hypfs: use ktime_get_seconds() instead of get_seconds() time_t and get_seconds() are deprecated because they will overflow on 32-bit architectures in the future. This is not a problem on 64-bit s390, but we should use proper interfaces anyway. Besides moving to the time64_t based interface, the CLOCK_MONOTONIC based ktime_get_seconds() is preferred for kernel internal timekeeping because it does not behave in unexpected ways during leap second changes or settimeofday() calls. Signed-off-by: Arnd Bergmann Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 06b513d192b9..c681329fdeec 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -36,7 +36,7 @@ struct hypfs_sb_info { kuid_t uid; /* uid used for files and dirs */ kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ - time_t last_update; /* last update time in secs since 1970 */ + time64_t last_update; /* last update, CLOCK_MONOTONIC time */ struct mutex lock; /* lock to protect update process */ }; @@ -52,7 +52,7 @@ static void hypfs_update_update(struct super_block *sb) struct hypfs_sb_info *sb_info = sb->s_fs_info; struct inode *inode = d_inode(sb_info->update_file); - sb_info->last_update = get_seconds(); + sb_info->last_update = ktime_get_seconds(); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); } @@ -179,7 +179,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) * to restart data collection in this case. */ mutex_lock(&fs_info->lock); - if (fs_info->last_update == get_seconds()) { + if (fs_info->last_update == ktime_get_seconds()) { rc = -EBUSY; goto out; } -- cgit v1.2.3-59-g8ed1b From 514211f542dd6eb877adf46d400ce351c81d95ad Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 12 Jun 2018 16:46:59 +0200 Subject: s390/setup: do not reserve the decompressor code Introduce PARMAREA_END, and use it for memblock reserve of low memory, which is used for lowcore, kdump data mover code and page buffer, early stack and parmarea. There is no need to reserve an area between PARMAREA_END and the decompressor _ehead. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 1 + arch/s390/kernel/setup.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 9c30ebe046f3..be02f0558048 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -11,6 +11,7 @@ #define PARMAREA 0x10400 +#define PARMAREA_END 0x11000 /* * Machine features detected in early.c diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d82a9ec64ea9..c637c12f9e37 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -674,12 +674,12 @@ static void __init reserve_kernel(void) #ifdef CONFIG_DMA_API_DEBUG /* * DMA_API_DEBUG code stumbles over addresses from the - * range [_ehead, _stext]. Mark the memory as reserved + * range [PARMAREA_END, _stext]. Mark the memory as reserved * so it is not used for CONFIG_DMA_API_DEBUG=y. */ memblock_reserve(0, PFN_PHYS(start_pfn)); #else - memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve(0, PARMAREA_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); #endif -- cgit v1.2.3-59-g8ed1b From f1b0a4343c4184bef9fdea8fad41f09bbd3d63ec Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 12 Jun 2018 15:42:36 +0200 Subject: s390/zcrypt: Integrate ap_asm.h into include/asm/ap.h. Move all the inline functions from the ap bus header file ap_asm.h into the in-kernel api header file arch/s390/include/asm/ap.h so that KVM can make use of all the low level AP functions. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ap.h | 284 +++++++++++++++++++++++++++++++++++++---- drivers/s390/crypto/ap_asm.h | 261 ------------------------------------- drivers/s390/crypto/ap_bus.c | 21 +-- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/ap_card.c | 1 - drivers/s390/crypto/ap_queue.c | 20 --- 6 files changed, 259 insertions(+), 329 deletions(-) delete mode 100644 drivers/s390/crypto/ap_asm.h (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c1bedb4c8de0..046e044a48d0 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -46,6 +46,50 @@ struct ap_queue_status { unsigned int _pad2 : 16; }; +/** + * ap_intructions_available() - Test if AP instructions are available. + * + * Returns 0 if the AP instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2"); + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_tapq(): Test adjunct processor queue. + * @qid: The AP queue number + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "=d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + if (info) + *info = reg2; + return reg1; +} + /** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number @@ -54,10 +98,57 @@ struct ap_queue_status { * * Returns AP queue status structure. */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info); +static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) +{ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ + return ap_tapq(qid, info); +} +/** + * ap_pqap_rapq(): Reset adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_rapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (1UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_zapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (2UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(ZAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * struct ap_config_info - convenience struct for AP crypto + * config info as returned by the ap_qci() function. + */ struct ap_config_info { unsigned int apsc : 1; /* S bit */ unsigned int apxa : 1; /* N bit */ @@ -74,50 +165,189 @@ struct ap_config_info { unsigned char _reserved4[16]; } __aligned(8); -/* - * ap_query_configuration(): Fetch cryptographic config info +/** + * ap_qci(): Get AP configuration data * - * Returns the ap configuration info fetched via PQAP(QCI). - * On success 0 is returned, on failure a negative errno - * is returned, e.g. if the PQAP(QCI) instruction is not - * available, the return value will be -EOPNOTSUPP. + * Returns 0 on success, or -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info); +static inline int ap_qci(struct ap_config_info *config) +{ + register unsigned long reg0 asm ("0") = 4UL << 24; + register unsigned long reg1 asm ("1") = -EOPNOTSUPP; + register struct ap_config_info *reg2 asm ("2") = config; + + asm volatile( + ".long 0xb2af0000\n" /* PQAP(QCI) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1) + : "d" (reg0), "d" (reg2) + : "cc", "memory"); + + return reg1; +} /* * struct ap_qirq_ctrl - convenient struct for easy invocation - * of the ap_queue_irq_ctrl() function. This struct is passed - * as GR1 parameter to the PQAP(AQIC) instruction. For details - * please see the AR documentation. + * of the ap_aqic() function. This struct is passed as GR1 + * parameter to the PQAP(AQIC) instruction. For details please + * see the AR documentation. */ struct ap_qirq_ctrl { unsigned int _res1 : 8; - unsigned int zone : 8; /* zone info */ - unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ unsigned int _res2 : 4; - unsigned int gisc : 3; /* guest isc field */ + unsigned int gisc : 3; /* guest isc field */ unsigned int _res3 : 6; - unsigned int gf : 2; /* gisa format */ + unsigned int gf : 2; /* gisa format */ unsigned int _res4 : 1; - unsigned int gisa : 27; /* gisa origin */ + unsigned int gisa : 27; /* gisa origin */ unsigned int _res5 : 1; - unsigned int isc : 3; /* irq sub class */ + unsigned int isc : 3; /* irq sub class */ }; /** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl, see above + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. + */ +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + register unsigned long reg0 asm ("0") = qid | (3UL << 24); + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; + register struct ap_queue_status reg1_out asm ("1"); + register void *reg2 asm ("2") = ind; + + asm volatile( + ".long 0xb2af0000" /* PQAP(AQIC) */ + : "=d" (reg1_out) + : "d" (reg0), "d" (reg1_in), "d" (reg2) + : "cc"); + return reg1_out; +} + +/* + * union ap_qact_ap_info - used together with the + * ap_aqic() function to provide a convenient way + * to handle the ap info needed by the qact function. + */ +union ap_qact_ap_info { + unsigned long val; + struct { + unsigned int : 3; + unsigned int mode : 3; + unsigned int : 26; + unsigned int cat : 8; + unsigned int : 8; + unsigned char ver[2]; + }; +}; + +/** + * ap_qact(): Query AP combatibility type. + * @qid: The AP queue number + * @apinfo: On input the info about the AP queue. On output the + * alternate AP queue info provided by the qact function + * in GR2 is stored in. * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. + * Returns AP queue status. Check response_code field for failures. */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind); +static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, + union ap_qact_ap_info *apinfo) +{ + register unsigned long reg0 asm ("0") = qid | (5UL << 24) + | ((ifbit & 0x01) << 22); + register unsigned long reg1_in asm ("1") = apinfo->val; + register struct ap_queue_status reg1_out asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(QACT) */ + : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2) + : "d" (reg0) + : "cc"); + apinfo->val = reg2; + return reg1_out; +} + +/** + * ap_nqap(): Send message to adjunct processor queue. + * @qid: The AP queue number + * @psmid: The program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on NQAP can't happen because the L bit is 1. + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status ap_nqap(ap_qid_t qid, + unsigned long long psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* NQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5) + : "cc", "memory"); + return reg1; +} + +/** + * ap_dqap(): Receive message from adjunct processor queue. + * @qid: The AP queue number + * @psmid: Pointer to program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status ap_dqap(ap_qid_t qid, + unsigned long long *psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" /* DQAP */ + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) + : : "cc", "memory"); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} #endif /* _ASM_S390_AP_H_ */ diff --git a/drivers/s390/crypto/ap_asm.h b/drivers/s390/crypto/ap_asm.h deleted file mode 100644 index e22ee126c9df..000000000000 --- a/drivers/s390/crypto/ap_asm.h +++ /dev/null @@ -1,261 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky - * - * Adjunct processor bus inline assemblies. - */ - -#ifndef _AP_ASM_H_ -#define _AP_ASM_H_ - -#include - -/** - * ap_intructions_available() - Test if AP instructions are available. - * - * Returns 0 if the AP instructions are installed. - */ -static inline int ap_instructions_available(void) -{ - register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); - register unsigned long reg1 asm ("1") = -ENODEV; - register unsigned long reg2 asm ("2"); - - asm volatile( - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg1), "=d" (reg2) - : "d" (reg0) - : "cc"); - return reg1; -} - -/** - * ap_tapq(): Test adjunct processor queue. - * @qid: The AP queue number - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) -{ - register unsigned long reg0 asm ("0") = qid; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2"); - - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ - : "=d" (reg1), "=d" (reg2) - : "d" (reg0) - : "cc"); - if (info) - *info = reg2; - return reg1; -} - -/** - * ap_pqap_rapq(): Reset adjunct processor queue. - * @qid: The AP queue number - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_rapq(ap_qid_t qid) -{ - register unsigned long reg0 asm ("0") = qid | (1UL << 24); - register struct ap_queue_status reg1 asm ("1"); - - asm volatile( - ".long 0xb2af0000" /* PQAP(RAPQ) */ - : "=d" (reg1) - : "d" (reg0) - : "cc"); - return reg1; -} - -/** - * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. - * @qid: The AP queue number - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_zapq(ap_qid_t qid) -{ - register unsigned long reg0 asm ("0") = qid | (2UL << 24); - register struct ap_queue_status reg1 asm ("1"); - - asm volatile( - ".long 0xb2af0000" /* PQAP(ZAPQ) */ - : "=d" (reg1) - : "d" (reg0) - : "cc"); - return reg1; -} - -/** - * ap_aqic(): Control interruption for a specific AP. - * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - */ -static inline struct ap_queue_status ap_aqic(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; - register struct ap_queue_status reg1_out asm ("1"); - register void *reg2 asm ("2") = ind; - - asm volatile( - ".long 0xb2af0000" /* PQAP(AQIC) */ - : "=d" (reg1_out) - : "d" (reg0), "d" (reg1_in), "d" (reg2) - : "cc"); - return reg1_out; -} - -/** - * ap_qci(): Get AP configuration data - * - * Returns 0 on success, or -EOPNOTSUPP. - */ -static inline int ap_qci(void *config) -{ - register unsigned long reg0 asm ("0") = 4UL << 24; - register unsigned long reg1 asm ("1") = -EINVAL; - register void *reg2 asm ("2") = (void *) config; - - asm volatile( - ".long 0xb2af0000\n" /* PQAP(QCI) */ - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg1) - : "d" (reg0), "d" (reg2) - : "cc", "memory"); - - return reg1; -} - -/* - * union ap_qact_ap_info - used together with the - * ap_aqic() function to provide a convenient way - * to handle the ap info needed by the qact function. - */ -union ap_qact_ap_info { - unsigned long val; - struct { - unsigned int : 3; - unsigned int mode : 3; - unsigned int : 26; - unsigned int cat : 8; - unsigned int : 8; - unsigned char ver[2]; - }; -}; - -/** - * ap_qact(): Query AP combatibility type. - * @qid: The AP queue number - * @apinfo: On input the info about the AP queue. On output the - * alternate AP queue info provided by the qact function - * in GR2 is stored in. - * - * Returns AP queue status. Check response_code field for failures. - */ -static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, - union ap_qact_ap_info *apinfo) -{ - register unsigned long reg0 asm ("0") = qid | (5UL << 24) - | ((ifbit & 0x01) << 22); - register unsigned long reg1_in asm ("1") = apinfo->val; - register struct ap_queue_status reg1_out asm ("1"); - register unsigned long reg2 asm ("2"); - - asm volatile( - ".long 0xb2af0000" /* PQAP(QACT) */ - : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2) - : "d" (reg0) - : "cc"); - apinfo->val = reg2; - return reg1_out; -} - -/** - * ap_nqap(): Send message to adjunct processor queue. - * @qid: The AP queue number - * @psmid: The program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on NQAP can't happen because the L bit is 1. - * Condition code 2 on NQAP also means the send is incomplete, - * because a segment boundary was reached. The NQAP is repeated. - */ -static inline struct ap_queue_status ap_nqap(ap_qid_t qid, - unsigned long long psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = (unsigned long) msg; - register unsigned long reg3 asm ("3") = (unsigned long) length; - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; - - asm volatile ( - "0: .long 0xb2ad0042\n" /* NQAP */ - " brc 2,0b" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) - : "d" (reg4), "d" (reg5) - : "cc", "memory"); - return reg1; -} - -/** - * ap_dqap(): Receive message from adjunct processor queue. - * @qid: The AP queue number - * @psmid: Pointer to program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on DQAP means the receive has taken place - * but only partially. The response is incomplete, hence the - * DQAP is repeated. - * Condition code 2 on DQAP also means the receive is incomplete, - * this time because a segment boundary was reached. Again, the - * DQAP is repeated. - * Note that gpr2 is used by the DQAP instruction to keep track of - * any 'residual' length, in case the instruction gets interrupted. - * Hence it gets zeroed before the instruction. - */ -static inline struct ap_queue_status ap_dqap(ap_qid_t qid, - unsigned long long *psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm("0") = qid | 0x80000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm("2") = 0UL; - register unsigned long reg4 asm("4") = (unsigned long) msg; - register unsigned long reg5 asm("5") = (unsigned long) length; - register unsigned long reg6 asm("6") = 0UL; - register unsigned long reg7 asm("7") = 0UL; - - - asm volatile( - "0: .long 0xb2ae0064\n" /* DQAP */ - " brc 6,0b\n" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), - "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) - : : "cc", "memory"); - *psmid = (((unsigned long long) reg6) << 32) + reg7; - return reg1; -} - -#endif /* _AP_ASM_H_ */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 35a0c2b52f82..c0a6723be54b 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,7 +36,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" #include "ap_debug.h" /* @@ -174,24 +173,6 @@ static inline int ap_qact_available(void) return 0; } -/** - * ap_test_queue(): Test adjunct processor queue. - * @qid: The AP queue number - * @tbit: Test facilities bit - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info) -{ - if (tbit) - qid |= 1UL << 23; /* set T bit*/ - return ap_tapq(qid, info); -} -EXPORT_SYMBOL(ap_test_queue); - /* * ap_query_configuration(): Fetch cryptographic config info * @@ -200,7 +181,7 @@ EXPORT_SYMBOL(ap_test_queue); * is returned, e.g. if the PQAP(QCI) instruction is not * available, the return value will be -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info) +static inline int ap_query_configuration(struct ap_config_info *info) { if (!ap_configuration_available()) return -EOPNOTSUPP; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6a273c5ebca5..936541937e15 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -15,6 +15,7 @@ #include #include +#include #include #define AP_DEVICES 256 /* Number of AP devices. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 2c726df210f6..c13e43292cb7 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -14,7 +14,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" /* * AP card related attributes. diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ba3a2e13b0eb..e365171fe28f 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -14,26 +14,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" - -/** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. - */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - return ap_aqic(qid, qirqctrl, ind); -} -EXPORT_SYMBOL(ap_queue_irq_ctrl); /** * ap_queue_enable_interruption(): Enable interruption on an AP queue. -- cgit v1.2.3-59-g8ed1b From 6b2ddf33baec23dace85bd647e3fc4ac070963e8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 17 Jun 2018 00:30:43 +0200 Subject: s390/extmem: fix gcc 8 stringop-overflow warning arch/s390/mm/extmem.c: In function '__segment_load': arch/s390/mm/extmem.c:436:2: warning: 'strncat' specified bound 7 equals source length [-Wstringop-overflow=] strncat(seg->res_name, " (DCSS)", 7); What gcc complains about here is the misuse of strncat function, which in this case does not limit a number of bytes taken from "src", so it is in the end the same as strcat(seg->res_name, " (DCSS)"); Keeping in mind that a res_name is 15 bytes, strncat in this case would overflow the buffer and write 0 into alignment byte between the fields in the struct. To avoid that increasing res_name size to 16, and reusing strlcat. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 6ad15d3fab81..84111a43ea29 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -80,7 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || -- cgit v1.2.3-59-g8ed1b From f6ea4d25e95972f482e14cdcefaa0789e6158d35 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 18 Jun 2018 16:50:42 +0200 Subject: s390: fix gcc 8 stringop-truncation warnings in proc handlers arch/s390/kernel/topology.c:591:3: warning: 'strncpy' output truncated before terminating nul copying 2 bytes from a string of the same length [-Wstringop-truncation] strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", ARRAY_SIZE(buf)); arch/s390/appldata/appldata_base.c:326:3: warning: 'strncpy' output truncated before terminating nul copying 2 bytes from a string of the same length [-Wstringop-truncation] strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); arch/s390/appldata/appldata_base.c:217:3: warning: 'strncpy' output truncated before terminating nul copying 2 bytes from a string of the same length [-Wstringop-truncation] strncpy(buf, appldata_timer_active ? "1\n" : "0\n", ARRAY_SIZE(buf)); To avoid the warning, just reuse memcpy. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 5 ++--- arch/s390/kernel/topology.c | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee6a9c387c87..25c1399b0787 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -214,8 +214,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, return 0; } if (!write) { - strncpy(buf, appldata_timer_active ? "1\n" : "0\n", - ARRAY_SIZE(buf)); + memcpy(buf, appldata_timer_active ? "1\n" : "0\n", ARRAY_SIZE(buf)); len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; @@ -323,7 +322,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, return 0; } if (!write) { - strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); + memcpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b6e0397f66d..33ebb423c613 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -588,8 +588,7 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write, return 0; } if (!write) { - strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", - ARRAY_SIZE(buf)); + memcpy(buf, topology_is_enabled() ? "1\n" : "0\n", ARRAY_SIZE(buf)); len = strnlen(buf, ARRAY_SIZE(buf)); if (len > *lenp) len = *lenp; -- cgit v1.2.3-59-g8ed1b From ad82a928eb58471adb2dec2001f5fbe57e5ee4b5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 18 Jun 2018 17:51:29 +0200 Subject: s390/perf: fix gcc 8 array-bounds warning arch/s390/kernel/perf_regs.c:36:19: warning: array subscript 16 is above array bounds of 'long unsigned int[16]' [-Warray-bounds] return regs->gprs[idx]; gcc tries to be smart here and since there is a condition: if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) return regs->gprs[idx]; which covers all possible array subscripts, it gives the warning for the last function return statement: return regs->gprs[idx]; which in presence of that condition does not really make sense and should be replaced with "return 0;" Also move WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX) to the end of the function. Reviewed-by: Hendrik Brueckner Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 54e2d634b849..4352a504f235 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -12,9 +12,6 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { freg_t fp; - if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) - return 0; - if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) return regs->gprs[idx]; @@ -33,7 +30,8 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if (idx == PERF_REG_S390_PC) return regs->psw.addr; - return regs->gprs[idx]; + WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX); + return 0; } #define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) -- cgit v1.2.3-59-g8ed1b From 4778dc08ef99825d2931d44888fda8d49319d93a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 14:46:57 +0200 Subject: s390/decompressor: discard ___kcrctab section ___kcrctab section is not used during the decompressor phase and could be discarded to save the memory. It is currently generated due to lib/mem.S usage, which exports few symbols. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index b8572a81be5f..93ab95cf16fb 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -58,5 +58,6 @@ SECTIONS *(.eh_frame) *(__ex_table) *(*__ksymtab*) + *(___kcrctab*) } } -- cgit v1.2.3-59-g8ed1b From 94cbc0ea163f58ec3ce167a1ccbdf55ff1e88111 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 14:58:46 +0200 Subject: s390: put expoline execute-trampolines into .text section Currently none of the vmlinux linker script patterns in .text section match expoline execute-trampolines, and they end up as separate sections: .text.__s390_indirect_jump_r1, .text.__s390x_indirect_jump_r1use_r9, .text.__s390x_indirect_branch_4_1use_6, ... Add a pattern to match them all. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 31f530b8a1b3..51adfe251bcd 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -47,6 +47,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) } :text = 0x0700 -- cgit v1.2.3-59-g8ed1b From 8e533fdd092598db73312af801dbddea8cab6f9c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 16:59:26 +0200 Subject: s390: remove unused _ehead symbol Since startup code now reserves memory ranges [0, PARMAREA_END] and [_stext, ] _ehead symbol is not used and could be cleaned up. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sections.h | 2 -- arch/s390/kernel/head64.S | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 54f81f8ed662..724faede8ac5 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,4 @@ #include -extern char _ehead[]; - #endif diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 6a840bcb2715..161b795d3109 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -87,8 +87,6 @@ ENTRY(startup_continue) .Llinkage_stack: .long 0,0,0x89000000,0,0,0,0x8a000000,0 -ENTRY(_ehead) - # # startup-code, running in absolute addressing mode # -- cgit v1.2.3-59-g8ed1b From 183ab05ff285555c56b7f00bb4fd52d80a9513fb Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 17:10:14 +0200 Subject: s390: get rid of the first mb of uncompressed image Instead of generating uncompressed kernel image starting at 0, filling first mb with zeros (with ".org 0x100000") and then trimming it off from vmlinux.bin before compression, simply generate a kernel image starting from 0x100000. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/Makefile | 15 ++++----------- arch/s390/kernel/head64.S | 1 - arch/s390/kernel/vmlinux.lds.S | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 9d3bd7d066e9..04609478d18b 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -23,16 +23,12 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) -TRIM_HEAD_SIZE := 0x100000 - -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' +# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p' quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -quiet_cmd_trim_head = TRIM $@ - cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@ - $(obj)/sizes.h: vmlinux $(call if_changed,sizes) @@ -42,13 +38,10 @@ $(obj)/head.o: $(obj)/sizes.h CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S -$(obj)/vmlinux.bin.full: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux $(call if_changed,objcopy) -$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full - $(call if_changed,trim_head) - vmlinux.bin.all-y := $(obj)/vmlinux.bin suffix-$(CONFIG_KERNEL_GZIP) := .gz diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 161b795d3109..6f43faeaaf32 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,6 @@ #include __HEAD - .org 0x100000 ENTRY(startup_continue) tm __LC_STFLE_FAC_LIST+5,0x80 # LPP available ? jz 0f diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 51adfe251bcd..35f7ca8a1685 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -30,7 +30,7 @@ PHDRS { SECTIONS { - . = 0x00000000; + . = 0x100000; .text : { /* Text and read-only data */ HEAD_TEXT -- cgit v1.2.3-59-g8ed1b From c949786450d6e19ef21f0837488a3dc1554131d6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 17:32:53 +0200 Subject: s390: correct _stext offset Avoid unnecessary rewrite of psw and merge _stext into startup_continue. This allows to move _stext definition to vmlinux.lds.S, where _etext is also defined and set _stext to the actual beginning of .text at 0x100000. This fixes the problem with setting the last .text page as not-executable due to vmem_map_init relying on page alinged _stext and _etext. Fixes: bd79d6632958 ("s390/decompressor: trim the kernel image up to 1M") Reported-by: Nils Hoppmann Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head64.S | 40 +++++++++++++++------------------------- arch/s390/kernel/vmlinux.lds.S | 1 + 2 files changed, 16 insertions(+), 25 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 6f43faeaaf32..6d14ad42ba88 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -48,11 +48,23 @@ ENTRY(startup_continue) # Early machine initialization and detection functions. # brasl %r14,startup_init - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... + +# check control registers + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) + + lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + .align 16 .LPG1: -.Lentry:.quad 0x0000000180000000,_stext .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -85,27 +97,5 @@ ENTRY(startup_continue) .endr .Llinkage_stack: .long 0,0,0x89000000,0,0,0,0x8a000000,0 - -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw - - .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 35f7ca8a1685..b3e95b9d99de 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -31,6 +31,7 @@ PHDRS { SECTIONS { . = 0x100000; + _stext = .; /* Start of text section */ .text : { /* Text and read-only data */ HEAD_TEXT -- cgit v1.2.3-59-g8ed1b From f56506ef3061f3362bb35a396f6951f7035b8294 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Jun 2018 17:53:40 +0200 Subject: s390: move _text to an actual .text start Since the uncompressed image .text section starts at 0x100000 now there is no need to redefine _text to something else to make perf happy. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vmlinux.lds.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b3e95b9d99de..b43f8d33a369 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -34,13 +34,8 @@ SECTIONS _stext = .; /* Start of text section */ .text : { /* Text and read-only data */ + _text = .; HEAD_TEXT - /* - * E.g. perf doesn't like symbols starting at address zero, - * therefore skip the initial PSW and channel program located - * at address zero and let _text start at 0x200. - */ - _text = 0x200; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT -- cgit v1.2.3-59-g8ed1b From 2d6f74f797f26e14bdd672b3547ad29199bbb6b8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Jun 2018 07:39:24 +0200 Subject: s390/decompressor: correct EXCLUDE_FILE construct The following linker construct is problematic with linkers of binutils < 2.28: EXCLUDE_FILE (*piggy.o) *(.rodata.*) from 8f1732fc2a11dc of binutils: "though the linker accepts this without complaint the EXCLUDE_FILE part is silently ignored and has no effect." Silent ignoring of EXCLUDE_FILE construct made .rodata.compressed be part of .rodata, and in case of .rodata.compressed following some unaligned data, input_len would also become unaligned. from arch/s390/boot/compressed/vmlinux.map: .rodata.compressed 0x0000000000012fea 0x4d57e7 arch/s390/boot/compressed/piggy.o 0x0000000000012fea input_len 0x0000000000012fee input_data input_len is later used here: arch/s390/boot/compressed/misc.c:113 __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); asm generated by gcc looks like: .loc 3 113 0 egfrl %r11,input_len from what assembler generates invalid (the second operand must be aligned on a doubleword boundary): 0x00000000000129b4 <+148>: c4 bc 00 00 03 1b lgfrl %r11,0x12fea hence specification exception is recognized. To avoid an issue use EXCLUDE_FILE construct which is recognized by older linkers (since at least binutils-2_11) *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) Also ensure that .rodata.compressed is at least doubleword aligned. Fixes: 89b5202e81df ("s390/decompressor: support uncompressed kernel") Reported-by: Halil Pasic Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/vmlinux.lds.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 93ab95cf16fb..b16ac8b3c439 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -26,7 +26,7 @@ SECTIONS .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - EXCLUDE_FILE (*piggy.o) *(.rodata.*) + *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) _erodata = . ; } .data : { @@ -38,6 +38,8 @@ SECTIONS startup_continue = 0x100000; #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; +#else + . = ALIGN(8); #endif .rodata.compressed : { *(.rodata.compressed) -- cgit v1.2.3-59-g8ed1b From 3ad4c05a53b5f816fb0480ca0c55cc7957bbf5f1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Jun 2018 13:12:27 +0200 Subject: s390: align struct lowcore to double page size Aligning struct lowcore to double page size allows to get rid of this gcc warning: In file included from ./arch/s390/include/asm/setup.h:56, from ./arch/s390/include/asm/page.h:36, from ./arch/s390/include/asm/user.h:11, from ./include/linux/user.h:1, from ./include/linux/elfcore.h:5, from ./include/linux/crash_core.h:6, from ./include/linux/kexec.h:18, from arch/s390/purgatory/purgatory.c:10: ./arch/s390/include/asm/lowcore.h:189:1: warning: alignment 1 of 'struct lowcore' is less than 8 [-Wpacked-not-aligned] } __packed; Acked-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5bc888841eaf..406d940173ab 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -185,7 +185,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ -} __packed; +} __packed __aligned(8192); #define S390_lowcore (*((struct lowcore *) 0)) -- cgit v1.2.3-59-g8ed1b From 627c9b62058ebfd368b7411e7ea5a27df48486c8 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 25 Jun 2018 13:59:35 +0200 Subject: s390/boot: block uncompressed vmlinux booting attempts Since the plain vmlinux ELF file no longer carries all necessary parts for starting up (like the entry point and decompressor), add a check which would block boot process and encourage users to use bzImage or arch/s390/boot/compressed/vmlinux instead. The check relies on s390 linux entry point ABI definition, which is only present in bzImage and arch/s390/boot/compressed/vmlinux. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Acked-by: Cornelia Huck Acked-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/head.S | 4 ++-- arch/s390/include/asm/setup.h | 3 ++- arch/s390/kernel/early.c | 12 ++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index f09e792df495..f721913b73f1 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -272,14 +272,14 @@ iplstart: .org 0x10000 ENTRY(startup) j .Lep_startup_normal - .org 0x10008 + .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of # valid entry points is stored. # # IMPORTANT: Do not change this table, it is s390 kernel ABI! # - .ascii "S390EP" + .ascii EP_STRING .byte 0x00,0x01 # # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index be02f0558048..1d66016f4170 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -9,7 +9,8 @@ #include #include - +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" #define PARMAREA 0x10400 #define PARMAREA_END 0x11000 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 827699eb48fa..5b28b434f8a1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -331,8 +331,20 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } +static void __init check_image_bootable(void) +{ + if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) + return; + + sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); + sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); + sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); + disabled_wait(0xbadb007); +} + void __init startup_init(void) { + check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); -- cgit v1.2.3-59-g8ed1b From d053d639f00c58122ac0e3fe3a08aa8dc2a08ce6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Jun 2018 17:32:09 +0200 Subject: s390/tools: fix gcc 8 stringop-truncation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace strncpy which has been used to copy a substring into buf without NUL-termination with memcpy to avoid the following gcc 8 warnings: arch/s390/tools/gen_opcode_table.c: In function ‘add_to_group.isra.4’: arch/s390/tools/gen_opcode_table.c:260:2: warning: ‘strncpy’ output may be truncated copying 2 bytes from a string of length 19 [-Wstringop-truncation] strncpy(group->opcode, insn->opcode, 2); In function ‘print_opcode_table’, inlined from ‘main’ at arch/s390/tools/gen_opcode_table.c:333:2: arch/s390/tools/gen_opcode_table.c:286:4: warning: ‘strncpy’ output may be truncated copying 2 bytes from a string of length 19 [-Wstringop-truncation] strncpy(opcode, insn->opcode, 2); Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/tools/gen_opcode_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index 259aa0680d1a..a1bc02b29c81 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -257,7 +257,7 @@ static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) if (!desc->group) exit(EXIT_FAILURE); group = &desc->group[desc->nr_groups - 1]; - strncpy(group->opcode, insn->opcode, 2); + memcpy(group->opcode, insn->opcode, 2); group->type = insn->type; group->offset = offset; group->count = 1; @@ -283,7 +283,7 @@ static void print_opcode_table(struct gen_opcode *desc) continue; add_to_group(desc, insn, offset); if (strncmp(opcode, insn->opcode, 2)) { - strncpy(opcode, insn->opcode, 2); + memcpy(opcode, insn->opcode, 2); printf("\t/* %.2s */ \\\n", opcode); } print_opcode(insn, offset); -- cgit v1.2.3-59-g8ed1b From 9f35b818a2f90fb6cb291aa0c9f835d4f0974a9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Jul 2018 10:54:02 +0200 Subject: s390/sysinfo: add missing #ifdef CONFIG_PROC_FS Get rid of this compile warning for !PROC_FS: CC arch/s390/kernel/sysinfo.o arch/s390/kernel/sysinfo.c:275:12: warning: 'sysinfo_show' defined but not used [-Wunused-function] static int sysinfo_show(struct seq_file *m, void *v) Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sysinfo.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 54f5496913fa..12f80d1f0415 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) } EXPORT_SYMBOL(stsi); +#ifdef CONFIG_PROC_FS + static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { switch (encoding) { @@ -301,6 +303,8 @@ static int __init sysinfo_create_proc(void) } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ -- cgit v1.2.3-59-g8ed1b From b887b1b106654141ab034061ef0cc8d471ebc683 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Jul 2018 11:31:48 +0200 Subject: s390/decompressor: avoid constant startup.a rebuilds Correct merging error which replaced startup.a in targets list with non-existing setup.a. Due to missing startup.a in targets list if_changed triggered startup.a rebuild unconditionally. Fixes: 3e200c54438d ("s390/decompressor: avoid reusing uncompressed image objects") Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index a1ca3b805c74..e15baa7b8e0b 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -28,7 +28,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o -targets := bzImage setup.a $(obj-y) +targets := bzImage startup.a $(obj-y) subdir- := compressed OBJECTS := $(addprefix $(obj)/,$(obj-y)) -- cgit v1.2.3-59-g8ed1b From 7e0363b469f5d2ad00234497aad65c7b46db42c0 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Jun 2018 19:21:36 +0200 Subject: s390/decompressor: avoid packing *.o.chkbss files into startup.a startup.a build rule packs a list of prerequisites into archive. That didn't take into account extra prerequisites added by chkbss, so that zero length *.o.chkbss files were also packed into the archive. To avoid that filter only real object from prerequisites list. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e15baa7b8e0b..9e6668ee93de 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -40,7 +40,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ quiet_cmd_ar = AR $@ - cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter-out FORCE, $^) + cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^) $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -- cgit v1.2.3-59-g8ed1b From 251987a8025da049f42001bdb21250f79644ede3 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Jul 2018 11:32:51 +0200 Subject: s390/build: add *.o.chkbss files to targets list Adding *.o.chkbss files to targets list makes sure that the kbuild is aware of them and removes them during make clean. Also remove *.o.chkbss file before an actual check, to avoid having stale *.o.chkbss file left even if the check is failed. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/scripts/Makefile.chkbss | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index fb0a11bd520c..9bba2c14e0ca 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -2,6 +2,7 @@ quiet_cmd_chkbss = CHKBSS $< define cmd_chkbss + rm -f $@; \ if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ echo "error: $< .bss section is not empty" >&2; exit 1; \ fi; \ @@ -10,10 +11,13 @@ endef chkbss-target ?= $(obj)/built-in.a ifneq (,$(findstring /,$(chkbss))) -$(chkbss-target): $(patsubst %, %.chkbss, $(chkbss)) +chkbss-files := $(patsubst %, %.chkbss, $(chkbss)) else -$(chkbss-target): $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss)) endif +$(chkbss-target): $(chkbss-files) +targets += $(notdir $(chkbss-files)) + %.o.chkbss: %.o $(call cmd,chkbss) -- cgit v1.2.3-59-g8ed1b From ee6d777d3e93670af801a7288ee6281626f4ac2d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Jun 2018 19:22:21 +0200 Subject: s390/decompressor: support extra debug flags Since decompressor cflags and aflags are made from scratch add extra debug flags when CONFIG_DEBUG_INFO is enabled. Also adds support for CONFIG_DEBUG_INFO_DWARF4 option. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index d25fbc170164..eee6703093c3 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -16,12 +16,16 @@ KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 +aflags_dwarf := -Wa,-gdwarf-2 KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__ +KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ -- cgit v1.2.3-59-g8ed1b From 0ac942826b3d900b7054f71bb6a15428441458d2 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 8 Apr 2016 17:55:19 +0200 Subject: s390/pci: add fmt3 fmb Add support for format 3 function measurement blocks. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 5 +++++ arch/s390/pci/pci_debug.c | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 94f8db468c9b..10fe982f2b4b 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -51,6 +51,10 @@ struct zpci_fmb_fmt2 { u64 max_work_units; }; +struct zpci_fmb_fmt3 { + u64 tx_bytes; +}; + struct zpci_fmb { u32 format : 8; u32 fmt_ind : 24; @@ -66,6 +70,7 @@ struct zpci_fmb { struct zpci_fmb_fmt0 fmt0; struct zpci_fmb_fmt1 fmt1; struct zpci_fmb_fmt2 fmt2; + struct zpci_fmb_fmt3 fmt3; }; } __packed __aligned(128); diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index b482e95b6249..57f7cdac70a3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -48,6 +48,10 @@ static char *pci_fmt2_names[] = { "Maximum work units", }; +static char *pci_fmt3_names[] = { + "Transmitted bytes", +}; + static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", @@ -112,6 +116,10 @@ static int pci_perf_show(struct seq_file *m, void *v) pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names), &zdev->fmb->fmt2.consumed_work_units); break; + case 3: + pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names), + &zdev->fmb->fmt3.tx_bytes); + break; default: seq_puts(m, "Unknown format\n"); } -- cgit v1.2.3-59-g8ed1b From ad03b821fbc30395b72af438f5bb41676a5f891d Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Tue, 26 Jun 2018 12:24:30 +0200 Subject: s390/purgatory: Fix crash with expoline enabled When the kernel is built with CONFIG_EXPOLINE=y and a compiler with indirect branch mitigation enabled the purgatory crashes. The reason for that is that the macros defined for expoline are used in mem.S. These macros define new sections (.text.__s390x_indirect_*) which are marked executable. Due to the missing linker script those sections are linked to address 0, just as the .text section. In combination with the entry point also being at address 0 this causes the purgatory load code (kernel/kexec_file.c: kexec_purgatory_setup_sechdrs) to update the entry point twice. Thus the old kernel jumps to some 'random' address causing the crash. To fix this turn off expolines for the purgatory. There is no problem with this in this case due to the fact that the purgatory only runs once and the tlb is purged (diag 308) in the end. Fixes: 840798a1f5299 ("s390/kexec_file: Add purgatory") Cc: # 4.17 Signed-off-by: Philipp Rudo Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 1ace023cbdce..704e0f3c244a 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -23,6 +23,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) -- cgit v1.2.3-59-g8ed1b From c315e69308c739a43c4ebc539bedbc1ac8d79854 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Tue, 26 Jun 2018 18:24:52 +0200 Subject: s390/purgatory: Add missing FORCE to Makefile targets Without FORCE make does not detect changes only made to the command line options. So object files might not be re-built even when they should be. Fix this by adding FORCE where it is missing. Fixes: 840798a1f5299 ("s390/kexec_file: Add purgatory") Cc: # 4.17 Signed-off-by: Philipp Rudo Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/purgatory/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 704e0f3c244a..abfa8c7a6d9a 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -7,13 +7,13 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o targets += $(purgatory-y) purgatory.ro kexec-purgatory.c PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) -$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -- cgit v1.2.3-59-g8ed1b From 287d6070acbb2529e24462faffca46663885dddc Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 27 Jun 2018 12:03:43 +0200 Subject: s390/purgatory: Remove duplicate variable definitions Currently there are some variables in the purgatory (e.g. kernel_entry) which are defined twice, once in assembler- and once in c-code. The reason for this is that these variables are set during purgatory load, where sanity checks on the corresponding Elf_Sym's are made, while they are used in assembler-code. Thus adding a second definition in c-code is a handy workaround to guarantee correct Elf_Sym's are created. When the purgatory is compiled with -fcommon (default for gcc on s390) this is no problem because both symbols are merged by the linker. However this is not required by ISO C and when the purgatory is built with -fno-common the linker fails with errors like arch/s390/purgatory/purgatory.o:(.bss+0x18): multiple definition of `kernel_entry' arch/s390/purgatory/head.o:/.../arch/s390/purgatory/head.S:230: first defined here Thus remove the duplicate definitions and add the required size and type information to the assembler definition. Also add -fno-common to the command line options to prevent similar hacks in the future. Signed-off-by: Philipp Rudo Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/purgatory.h | 6 ------ arch/s390/purgatory/Makefile | 2 +- arch/s390/purgatory/head.S | 45 +++++++++++++++++---------------------- arch/s390/purgatory/purgatory.c | 9 -------- 4 files changed, 20 insertions(+), 42 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index 6090670df51f..e297bcfc476f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,11 +13,5 @@ int verify_sha256_digest(void); -extern u64 kernel_entry; -extern u64 kernel_type; - -extern u64 crash_start; -extern u64 crash_size; - #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index abfa8c7a6d9a..8d61218a71aa 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -21,7 +21,7 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 660c96a05a9b..2e3707b12edd 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -243,33 +243,26 @@ gprregs: .quad 0 .endr -purgatory_sha256_digest: - .global purgatory_sha256_digest - .rept 32 /* SHA256_DIGEST_SIZE */ - .byte 0 - .endr - -purgatory_sha_regions: - .global purgatory_sha_regions - .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ - .byte 0 - .endr - -kernel_entry: - .global kernel_entry - .quad 0 - -kernel_type: - .global kernel_type - .quad 0 - -crash_start: - .global crash_start - .quad 0 +/* Macro to define a global variable with name and size (in bytes) to be + * shared with C code. + * + * Add the .size and .type attribute to satisfy checks on the Elf_Sym during + * purgatory load. + */ +.macro GLOBAL_VARIABLE name,size +\name: + .global \name + .size \name,\size + .type \name,object + .skip \size,0 +.endm -crash_size: - .global crash_size - .quad 0 +GLOBAL_VARIABLE purgatory_sha256_digest,32 +GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE +GLOBAL_VARIABLE kernel_entry,8 +GLOBAL_VARIABLE kernel_type,8 +GLOBAL_VARIABLE crash_start,8 +GLOBAL_VARIABLE crash_size,8 .align PAGE_SIZE stack: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 4e2beb3c29b7..3528e6da4e87 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -12,15 +12,6 @@ #include #include -struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; -u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; - -u64 kernel_entry; -u64 kernel_type; - -u64 crash_start; -u64 crash_size; - int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; -- cgit v1.2.3-59-g8ed1b From 5bedf8aa03c28cb8dc98bdd32a41b66d8f7d3eaa Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 24 Jun 2018 12:17:43 +0200 Subject: s390/mm: correct allocate_pgste proc_handler callback Since proc_dointvec does not perform value range control, proc_dointvec_minmax should be used to limit value range, which is clearly intended here, as the internal representation of the value: unsigned int alloc_pgste:1; In fact it currently works, since we have mm->context.alloc_pgste = page_table_allocate_pgste || ... ... since commit 23fefe119ceb5 ("s390/kvm: avoid global config of vm.alloc_pgste=1") Before that it was mm->context.alloc_pgste = page_table_allocate_pgste; which was broken. That was introduced with commit 0b46e0a3ec0d7 ("s390/kvm: remove delayed reallocation of page tables for KVM"). Fixes: 0b46e0a3ec0d7 ("s390/kvm: remove delayed reallocation of page tables for KVM") Acked-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 84bd6329a88d..6cf9c9ff2bff 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -28,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = { .data = &page_table_allocate_pgste, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &page_table_allocate_pgste_min, .extra2 = &page_table_allocate_pgste_max, }, -- cgit v1.2.3-59-g8ed1b From 196851bed5221a7670024e3ad39704c6d92b2014 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 24 Jun 2018 12:28:46 +0200 Subject: s390/topology: correct topology mode proc handler Reuse proc_douintvec_minmax to simplify topology mode proc handler. This also enforces correct range of 0-1 on proc writes and correctly handles numbers starting with 0 or spaces. Before: $ echo 01 > /proc/sys/s390/topology $ cat /proc/sys/s390/topology 0 $ echo ' 1' > /proc/sys/s390/topology -bash: echo: write error: Invalid argument $ echo 2 > /proc/sys/s390/topology -bash: echo: write error: Invalid argument $ echo 12 > /proc/sys/s390/topology $ cat /proc/sys/s390/topology 1 After: $ echo 01 > /proc/sys/s390/topology $ cat /proc/sys/s390/topology 1 $ echo ' 1' > /proc/sys/s390/topology $ cat /proc/sys/s390/topology 1 $ echo 2 > /proc/sys/s390/topology -bash: echo: write error: Invalid argument $ echo 12 > /proc/sys/s390/topology -bash: echo: write error: Invalid argument $ echo ' 0' > /proc/sys/s390/topology $ cat /proc/sys/s390/topology 0 Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 33ebb423c613..e8184a15578a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -579,40 +579,33 @@ early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; + int enabled = topology_is_enabled(); int new_mode; - char buf[2]; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &enabled, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - memcpy(buf, topology_is_enabled() ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - if (buf[0] != '0' && buf[0] != '1') - return -EINVAL; mutex_lock(&smp_cpu_state_mutex); - new_mode = topology_get_mode(buf[0] == '1'); + new_mode = topology_get_mode(enabled); if (topology_mode != new_mode) { topology_mode = new_mode; topology_schedule_update(); } mutex_unlock(&smp_cpu_state_mutex); topology_flush_work(); -out: - *lenp = len; - *ppos += len; - return 0; + + return rc; } static struct ctl_table topology_ctl_table[] = { -- cgit v1.2.3-59-g8ed1b From 71e33a1dd712dc6f68de733218d4506bea26bda6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Jul 2018 15:56:28 +0200 Subject: s390/cmm: split and simplify cmm pages proc handler Split cmm_pages_handler into cmm_pages_handler and cmm_timed_pages_handler, each handling separate proc entry. And reuse proc_doulongvec_minmax to simplify proc handlers. Min/max values are optional and are omitted here. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 67 ++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 35 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 6cf024eb2085..50d8d1c887a4 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -251,45 +251,42 @@ static int cmm_skip_blanks(char *cp, char **endp) return str != cp; } -static struct ctl_table cmm_table[]; - static int cmm_pages_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - char buf[16], *p; - unsigned int len; - long nr; + long nr = cmm_get_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - cmm_skip_blanks(buf, &p); - nr = simple_strtoul(p, &p, 0); - if (ctl == &cmm_table[0]) - cmm_set_pages(nr); - else - cmm_add_timed_pages(nr); - } else { - if (ctl == &cmm_table[0]) - nr = cmm_get_pages(); - else - nr = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", nr); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - } - *lenp = len; - *ppos += len; + cmm_set_pages(nr); + return 0; +} + +static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + long nr = cmm_get_timed_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; + + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + + cmm_add_timed_pages(nr); return 0; } @@ -338,7 +335,7 @@ static struct ctl_table cmm_table[] = { { .procname = "cmm_timed_pages", .mode = 0644, - .proc_handler = cmm_pages_handler, + .proc_handler = cmm_timed_pages_handler, }, { .procname = "cmm_timeout", -- cgit v1.2.3-59-g8ed1b From 6c8021503f728b4b3eda69876c20e1ff30767a22 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Jul 2018 16:59:44 +0200 Subject: s390/appldata: reuse generic proc handler functions Simplify appldata proc handlers by reusing generic proc handler functions. Tested-by: Gerald Schaefer Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/appldata/appldata_base.c | 121 ++++++++++++++----------------------- 1 file changed, 45 insertions(+), 76 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 25c1399b0787..9bf8489df6e6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -206,34 +206,28 @@ static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - char buf[2]; + int timer_active = appldata_timer_active; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &timer_active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - memcpy(buf, appldata_timer_active ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; spin_lock(&appldata_timer_lock); - if (buf[0] == '1') + if (timer_active) __appldata_vtimer_setup(APPLDATA_ADD_TIMER); - else if (buf[0] == '0') + else __appldata_vtimer_setup(APPLDATA_DEL_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -247,37 +241,24 @@ static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - int interval; - char buf[16]; + int interval = appldata_interval; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &interval, + .maxlen = sizeof(int), + .extra1 = &one, + }; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - len = sprintf(buf, "%i\n", appldata_interval); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - interval = 0; - sscanf(buf, "%i", &interval); - if (interval <= 0) - return -EINVAL; + rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -292,10 +273,17 @@ appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - unsigned int len; - int rc, found; - char buf[2]; struct list_head *lh; + int rc, found; + int active; + int zero = 0; + int one = 1; + struct ctl_table ctl_entry = { + .data = &active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; found = 0; mutex_lock(&appldata_ops_mutex); @@ -316,31 +304,15 @@ appldata_generic_handler(struct ctl_table *ctl, int write, } mutex_unlock(&appldata_ops_mutex); - if (!*lenp || *ppos) { - *lenp = 0; + active = ops->active; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) { module_put(ops->owner); - return 0; - } - if (!write) { - memcpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) { - module_put(ops->owner); - return -EFAULT; - } - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) { - module_put(ops->owner); - return -EFAULT; + return rc; } mutex_lock(&appldata_ops_mutex); - if ((buf[0] == '1') && (ops->active == 0)) { + if (active && (ops->active == 0)) { // protect work queue callback if (!try_module_get(ops->owner)) { mutex_unlock(&appldata_ops_mutex); @@ -358,7 +330,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } else ops->active = 1; - } else if ((buf[0] == '0') && (ops->active == 1)) { + } else if (!active && (ops->active == 1)) { ops->active = 0; rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, (unsigned long) ops->data, ops->size, @@ -369,9 +341,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } mutex_unlock(&appldata_ops_mutex); -out: - *lenp = len; - *ppos += len; module_put(ops->owner); return 0; } -- cgit v1.2.3-59-g8ed1b From ac4c4fc87d6beb6dd8287f24670e5e81c24b1de7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Jul 2018 08:13:13 +0200 Subject: s390/cmm: avoid add_timer on concurrently used timer cmm_set_timer could be called concurrently from cmm_thread, cmm proc handler, upon cmm smsg receive and timer function itself. To avoid potential race condition and hitting BUG_ON in add_timer on already pending timer simply reuse mod_timer which is according to documentation "the only safe way to modify the timeout" with multiple unserialized concurrent users. mod_timer can handle both active and inactive timers which allows to carry out minor code simplification as well. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 50d8d1c887a4..510a18299196 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -191,12 +191,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - if (timer_pending(&cmm_timer)) { - if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) - return; - } - cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; - add_timer(&cmm_timer); + mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); } static void cmm_timer_fn(struct timer_list *unused) -- cgit v1.2.3-59-g8ed1b From 306d6c49ac9ded11114cb53b0925da52f2c2ada1 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 16 Jul 2018 10:38:57 +0200 Subject: s390/kvm: fix deadlock when killed by oom When the oom killer kills a userspace process in the page fault handler while in guest context, the fault handler fails to release the mm_sem if the FAULT_FLAG_RETRY_NOWAIT option is set. This leads to a deadlock when tearing down the mm when the process terminates. This bug can only happen when pfault is enabled, so only KVM clients are affected. The problem arises in the rare cases in which handle_mm_fault does not release the mm_sem. This patch fixes the issue by manually releasing the mm_sem when needed. Fixes: 24eb3a824c4f3 ("KVM: s390: Add FAULT_FLAG_RETRY_NOWAIT for guest fault") Cc: # 3.15+ Signed-off-by: Claudio Imbrenda Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e074480d3598..4cc3f06b0ab3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -502,6 +502,8 @@ retry: /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) -- cgit v1.2.3-59-g8ed1b From ccaabeea02026e1fbf9274800e43d9135914cd72 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Jun 2018 14:25:59 +0200 Subject: s390/chsc: fix packed-not-aligned warnings Remove attribute packed where possible failing this add proper alignment information to fix warnings like the one below: drivers/s390/cio/chsc.c: In function 'chsc_siosl': drivers/s390/cio/chsc.c:1287:2: warning: alignment 1 of 'struct ' is less than 4 [-Wpacked-not-aligned] } __attribute__ ((packed)) *siosl_area; Note: this patch should be a nop since non of these structs use auto storage but allocated pages. However there are changes to the generated code because of additional padding at the end of some of the structs due to alignment when memset(foo, 0, sizeof(*foo)) is used. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/chsc.h | 10 +++++----- drivers/s390/cio/chsc.c | 18 +++++++++--------- drivers/s390/cio/chsc.h | 18 +++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index dc329aa03f76..83a574e95b3a 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -23,29 +23,29 @@ struct chsc_async_header { __u32 key : 4; __u32 : 28; struct subchannel_id sid; -} __attribute__ ((packed)); +}; struct chsc_async_area { struct chsc_async_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; -} __attribute__ ((packed)); +}; struct chsc_header { __u16 length; __u16 code; -} __attribute__ ((packed)); +}; struct chsc_sync_area { struct chsc_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; -} __attribute__ ((packed)); +}; struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; -} __attribute__ ((packed)); +}; struct chsc_chp_cd { struct chp_id chpid; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9029804dcd22..a0baee25134c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -91,7 +91,7 @@ struct chsc_ssd_area { u16 sch; /* subchannel */ u8 chpid[8]; /* chpids 0-7 */ u16 fla[8]; /* full link addresses 0-7 */ -} __attribute__ ((packed)); +} __packed __aligned(PAGE_SIZE); int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) { @@ -319,7 +319,7 @@ struct chsc_sei { struct chsc_sei_nt2_area nt2_area; u8 nt_area[PAGE_SIZE - 24]; } u; -} __packed; +} __packed __aligned(PAGE_SIZE); /* * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands" @@ -841,7 +841,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable) u32 : 4; u32 fmt : 4; u32 : 16; - } __attribute__ ((packed)) *secm_area; + } *secm_area; unsigned long flags; int ret, ccode; @@ -1014,7 +1014,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) u32 cmg : 8; u32 zeroes3; u32 data[NR_MEASUREMENT_CHARS]; - } __attribute__ ((packed)) *scmc_area; + } *scmc_area; chp->shared = -1; chp->cmg = -1; @@ -1142,7 +1142,7 @@ int __init chsc_get_cssid(int idx) u8 cssid; u32 : 24; } list[0]; - } __packed *sdcal_area; + } *sdcal_area; int ret; spin_lock_irq(&chsc_page_lock); @@ -1192,7 +1192,7 @@ chsc_determine_css_characteristics(void) u32 reserved4; u32 general_char[510]; u32 chsc_char[508]; - } __attribute__ ((packed)) *scsc_area; + } *scsc_area; spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); @@ -1236,7 +1236,7 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta) unsigned int rsvd3[3]; u64 clock_delta; unsigned int rsvd4[2]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1261,7 +1261,7 @@ int chsc_sstpi(void *page, void *result, size_t size) unsigned int rsvd0[3]; struct chsc_header response; char data[]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1284,7 +1284,7 @@ int chsc_siosl(struct subchannel_id schid) u32 word3; struct chsc_header response; u32 word[11]; - } __attribute__ ((packed)) *siosl_area; + } *siosl_area; unsigned long flags; int ccode; int rc; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 5c9f0dd33f4e..78aba8d94eec 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -15,12 +15,12 @@ #define NR_MEASUREMENT_CHARS 5 struct cmg_chars { u32 values[NR_MEASUREMENT_CHARS]; -} __attribute__ ((packed)); +}; #define NR_MEASUREMENT_ENTRIES 8 struct cmg_entry { u32 values[NR_MEASUREMENT_ENTRIES]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt1 { u8 flags; @@ -38,7 +38,7 @@ struct channel_path_desc_fmt1 { u8 s:1; u8 f:1; u32 zeros[2]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt3 { struct channel_path_desc_fmt1 fmt1_desc; @@ -59,7 +59,7 @@ struct css_chsc_char { u32:7; u32 pnso:1; /* bit 116 */ u32:11; -}__attribute__((packed)); +} __packed; extern struct css_chsc_char css_chsc_characteristics; @@ -82,7 +82,7 @@ struct chsc_ssqd_area { struct chsc_header response; u32:32; struct qdio_ssqd_desc qdio_ssqd; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scssc_area { struct chsc_header request; @@ -102,7 +102,7 @@ struct chsc_scssc_area { u32 reserved[1004]; struct chsc_header response; u32:32; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scpd { struct chsc_header request; @@ -120,7 +120,7 @@ struct chsc_scpd { struct chsc_header response; u32:32; u8 data[0]; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_sda_area { struct chsc_header request; @@ -199,7 +199,7 @@ struct chsc_scm_info { u32 reserved2[10]; u64 restok; struct sale scmal[248]; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token); @@ -243,7 +243,7 @@ struct chsc_pnso_area { struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0]; struct qdio_brinfo_entry_l2 l2[0]; } entries; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_pnso_brinfo(struct subchannel_id schid, struct chsc_pnso_area *brinfo_area, -- cgit v1.2.3-59-g8ed1b From aeaf7002a76c8da60c0f503badcbddc07650678c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 May 2018 15:14:48 +0200 Subject: s390: detect etoken facility Detect and report the etoken facility. With spectre_v2=auto or CONFIG_EXPOLINE_AUTO=y automatically disable expolines and use the full branch prediction mode for the kernel. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nospec-branch.c | 12 +++++++++++- arch/s390/kernel/nospec-sysfs.c | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 18ae7b9c71d6..bdddaae96559 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -35,6 +35,8 @@ early_param("nospec", nospec_setup_early); static int __init nospec_report(void) { + if (test_facility(156)) + pr_info("Spectre V2 mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) @@ -56,7 +58,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (IS_ENABLED(CC_USING_EXPOLINE)) { + if (test_facility(156)) { + /* + * The machine supports etokens. + * Disable expolines and disable nobp. + */ + if (IS_ENABLED(CC_USING_EXPOLINE)) + nospec_disable = 1; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } else if (IS_ENABLED(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 8affad5f18cb..e30e580ae362 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -13,6 +13,8 @@ ssize_t cpu_show_spectre_v1(struct device *dev, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + if (test_facility(156)) + return sprintf(buf, "Mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) return sprintf(buf, "Mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) -- cgit v1.2.3-59-g8ed1b From 9fa6a659ca98c45a5c226d165adfed96a1e01564 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 19 Jul 2018 00:08:30 -0500 Subject: s390/hypfs: Replace PTR_RET with PTR_ERR_OR_ZERO PTR_RET is deprecated, use PTR_ERR_OR_ZERO instead. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index a2945b289a29..3452e18bb1ca 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -497,7 +497,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -544,7 +544,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) -- cgit v1.2.3-59-g8ed1b From ef4b891f7924aca43147928532ea729f290ca38e Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sun, 22 Jul 2018 19:11:09 +0530 Subject: s390: vdso_fault return type Use new return type vm_fault_t for fault handler vdso_fault. Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 09abae40f917..3031cc6dd0ab 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -47,7 +47,7 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; -static int vdso_fault(const struct vm_special_mapping *sm, +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page **vdso_pagelist; -- cgit v1.2.3-59-g8ed1b From 2a6777a118406858ad02bce8e4dc8302cba5a290 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Jul 2018 07:55:18 +0200 Subject: s390: disable gcc plugins The s390 build currently fails with the latent entropy plugin: arch/s390/kernel/als.o: In function `verify_facilities': als.c:(.init.text+0x24): undefined reference to `latent_entropy' als.c:(.init.text+0xae): undefined reference to `latent_entropy' make[3]: *** [arch/s390/boot/compressed/vmlinux] Error 1 make[2]: *** [arch/s390/boot/compressed/vmlinux] Error 2 make[1]: *** [bzImage] Error 2 This will be fixed with the early boot rework from Vasily, which is planned for the 4.19 merge window. For 4.18 the simplest solution is to disable the gcc plugins and reenable them after the early boot rework is upstream. Reported-by: Guenter Roeck Signed-off-by: Martin Schwidefsky (cherry picked from commit 2fba3573f1cf876ad94992c256c5c410039e60b4) --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3023fa00e4b5..f9b6036f1c16 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,7 +140,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_GCC_PLUGINS + select HAVE_GCC_PLUGINS if BROKEN select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 -- cgit v1.2.3-59-g8ed1b From 6eedfaac712d7a1ebcfa34085fbae91f19d33bf3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Jul 2018 08:23:29 +0200 Subject: s390: reenable gcc plugins Now that the early boot rework is upstream we can enable the gcc plugins again. See git commit 72f108b308707f21499e0ac05bf7370360cf06d8 "s390: disable gcc plugins" for reference. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f9b6036f1c16..3023fa00e4b5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,7 +140,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_GCC_PLUGINS if BROKEN + select HAVE_GCC_PLUGINS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 -- cgit v1.2.3-59-g8ed1b From 5a045bb9c44caebcf4e88bf78343166596c0014b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:16 +0100 Subject: s390/mm: Make gmap_protect_range more modular This patch reworks the gmap_protect_range logic and extracts the pte handling into an own function. Also we do now walk to the pmd and make it accessible in the function for later use. This way we can add huge page handling logic more easily. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Reviewed-by: Martin Schwidefsky --- arch/s390/mm/gmap.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bc56ec8abcf7..71b0e9ca0137 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -864,7 +864,79 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, */ static void gmap_pte_op_end(spinlock_t *ptl) { - spin_unlock(ptl); + if (ptl) + spin_unlock(ptl); +} + +/** + * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock + * and return the pmd pointer + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * + * Returns a pointer to the pmd for a guest address, or NULL + */ +static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) +{ + pmd_t *pmdp; + + BUG_ON(gmap_is_shadow(gmap)); + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); + + if (!pmdp || pmd_none(*pmdp)) { + spin_unlock(&gmap->guest_table_lock); + return NULL; + } + + /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ + if (!pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); + return pmdp; +} + +/** + * gmap_pmd_op_end - release the guest_table_lock if needed + * @gmap: pointer to the guest mapping meta data structure + * @pmdp: pointer to the pmd + */ +static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) +{ + if (pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); +} + +/* + * gmap_protect_pte - remove access rights to memory and set pgste bits + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @pmdp: pointer to the pmd associated with the pte + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns 0 if successfully protected, -ENOMEM if out of memory and + * -EAGAIN if a fixup is needed. + * + * Expected to be called with sg->mm->mmap_sem in read + */ +static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int rc; + pte_t *ptep; + spinlock_t *ptl = NULL; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return -EAGAIN; + + ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); + if (!ptep) + return -ENOMEM; + + /* Protect and unlock. */ + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); + gmap_pte_op_end(ptl); + return rc; } /* @@ -884,17 +956,21 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) { unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + pmd_t *pmdp; int rc; BUG_ON(gmap_is_shadow(gmap)); while (len) { rc = -EAGAIN; - ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); - if (ptep) { - rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); - gmap_pte_op_end(ptl); + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (pmdp) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + len -= PAGE_SIZE; + gaddr += PAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); } if (rc) { vmaddr = __gmap_translate(gmap, gaddr); @@ -903,10 +979,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); if (rc) return rc; - continue; } - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; } return 0; } -- cgit v1.2.3-59-g8ed1b From 2c46e974dd8b5316e65637af0ff6d4bc78554b2e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:18 +0100 Subject: s390/mm: Abstract gmap notify bit setting Currently we use the software PGSTE bits PGSTE_IN_BIT and PGSTE_VSIE_BIT to notify before an invalidation occurs on a prefix page or a VSIE page respectively. Both bits are pgste specific, but are used when protecting a memory range. Let's introduce abstract GMAP_NOTIFY_* bits that will be realized into the respective bits when gmap DAT table entries are protected. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Reviewed-by: David Hildenbrand --- arch/s390/include/asm/gmap.h | 4 ++++ arch/s390/mm/gmap.c | 11 +++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index e07cce88dfb0..c1bc5633fc6e 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,10 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +/* Generic bits for GMAP notification on DAT table entry changes. */ +#define GMAP_NOTIFY_SHADOW 0x2 +#define GMAP_NOTIFY_MPROT 0x1 + /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 71b0e9ca0137..0bada5e097cb 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -912,7 +912,7 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) * @gaddr: virtual address in the guest address space * @pmdp: pointer to the pmd associated with the pte * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * @bits: pgste notification bits to set + * @bits: notification bits to set * * Returns 0 if successfully protected, -ENOMEM if out of memory and * -EAGAIN if a fixup is needed. @@ -925,6 +925,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, int rc; pte_t *ptep; spinlock_t *ptl = NULL; + unsigned long pbits = 0; if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) return -EAGAIN; @@ -933,8 +934,10 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, if (!ptep) return -ENOMEM; + pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; + pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; /* Protect and unlock. */ - rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); gmap_pte_op_end(ptl); return rc; } @@ -1008,7 +1011,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; down_read(&gmap->mm->mmap_sem); - rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); + rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); up_read(&gmap->mm->mmap_sem); return rc; } @@ -1599,7 +1602,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, PGSTE_VSIE_BIT); + PROT_READ, GMAP_NOTIFY_SHADOW); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); new->initialized = true; -- cgit v1.2.3-59-g8ed1b From 58b7e200d2f1f9af9ef69a401a877791837a1c87 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:20 +0100 Subject: s390/mm: Add gmap pmd linking Let's allow pmds to be linked into gmap for the upcoming s390 KVM huge page support. Before this patch we copied the full userspace pmd entry. This is not correct, as it contains SW defined bits that might be interpreted differently in the GMAP context. Now we only copy over all hardware relevant information leaving out the software bits. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand --- arch/s390/include/asm/pgtable.h | 6 ++++-- arch/s390/mm/gmap.c | 13 +++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ab636089c60..fe36b3bb2afd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr) #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ -#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL +#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 0bada5e097cb..870e81fcb0cf 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -596,10 +596,15 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; + if (!rc) { + if (pmd_large(*pmd)) { + *table = pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS_LARGE; + } else + *table = pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS; + } + } spin_unlock(&gmap->guest_table_lock); spin_unlock(ptl); radix_tree_preload_end(); -- cgit v1.2.3-59-g8ed1b From 7c4b13a7c042fd6b71dc48d291208f8a97fad333 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:21 +0100 Subject: s390/mm: Add gmap pmd notification bit setting Like for ptes, we also need invalidation notification for pmds, to make sure the guest lowcore pages are always accessible and later addition of shadowed pmds. With PMDs we do not have PGSTEs or some other bits we could use in the host PMD. Instead we pick one of the free bits in the gmap PMD. Every time a host pmd will be invalidated, we will check if the respective gmap PMD has the bit set and in that case fire up the notifier. Signed-off-by: Janosch Frank --- arch/s390/include/asm/gmap.h | 3 +++ arch/s390/mm/gmap.c | 60 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index c1bc5633fc6e..276268b48aff 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -13,6 +13,9 @@ #define GMAP_NOTIFY_SHADOW 0x2 #define GMAP_NOTIFY_MPROT 0x1 +/* Status bits only for huge segment entries */ +#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ + /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 870e81fcb0cf..96dd94d51ad4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -911,6 +911,40 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) spin_unlock(&gmap->guest_table_lock); } +/* + * gmap_protect_pmd - remove access rights to memory and set pmd notification bits + * @pmdp: pointer to the pmd to be protected + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns: + * 0 if successfully protected + * -EAGAIN if a fixup is needed + * -EINVAL if unsupported notifier bits have been specified + * + * Expected to be called with sg->mm->mmap_sem in read and + * guest_table_lock held. + */ +static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; + int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; + + /* Fixup needed */ + if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) + return -EAGAIN; + + if (bits & GMAP_NOTIFY_MPROT) + pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + + /* Shadow GMAP protection needs split PMDs */ + if (bits & GMAP_NOTIFY_SHADOW) + return -EINVAL; + + return 0; +} + /* * gmap_protect_pte - remove access rights to memory and set pgste bits * @gmap: pointer to guest mapping meta data structure @@ -963,7 +997,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) { - unsigned long vmaddr; + unsigned long vmaddr, dist; pmd_t *pmdp; int rc; @@ -972,15 +1006,29 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, rc = -EAGAIN; pmdp = gmap_pmd_op_walk(gmap, gaddr); if (pmdp) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; + if (!pmd_large(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + len -= PAGE_SIZE; + gaddr += PAGE_SIZE; + } + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); + len = len < dist ? 0 : len - dist; + gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; + } } gmap_pmd_op_end(gmap, pmdp); } if (rc) { + if (rc == -EINVAL) + return rc; + + /* -EAGAIN, fixup of userspace mm and gmap */ vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; -- cgit v1.2.3-59-g8ed1b From 6a3762778d1ba1a58ab473124790cd612d10eadc Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:22 +0100 Subject: s390/mm: Add gmap pmd invalidation and clearing If the host invalidates a pmd, we also have to invalidate the corresponding gmap pmds, as well as flush them from the TLB. This is necessary, as we don't share the pmd tables between host and guest as we do with ptes. The clearing part of these three new functions sets a guest pmd entry to _SEGMENT_ENTRY_EMPTY, so the guest will fault on it and we will re-link it. Flushing the gmap is not necessary in the host's lazy local and csp cases. Both purge the TLB completely. Signed-off-by: Janosch Frank Reviewed-by: Martin Schwidefsky Acked-by: David Hildenbrand --- arch/s390/include/asm/pgtable.h | 4 ++ arch/s390/mm/gmap.c | 125 ++++++++++++++++++++++++++++++++++++++++ arch/s390/mm/pgtable.c | 17 +++++- 3 files changed, 143 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fe36b3bb2afd..087e0282f165 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1118,6 +1118,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr, int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste); +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 96dd94d51ad4..87c174ee3a8c 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2221,6 +2221,131 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, } EXPORT_SYMBOL_GPL(ptep_notify); +static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); +} + +static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, + int purge) +{ + pmd_t *pmdp; + struct gmap *gmap; + unsigned long gaddr; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (pmdp) { + gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(pmd_val(*pmdp) & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + if (purge) + __pmdp_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} + +/** + * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without + * flushing + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 0); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); + +/** + * gmap_pmdp_csp - csp all affected guest pmd entries + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 1); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_csp); + +/** + * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_LOCAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); + +/** + * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); + static inline void thp_split_mm(struct mm_struct *mm) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 301e466e4263..fe84c0715395 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm, mm->context.asce, IDTE_LOCAL); else __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); + if (mm_has_pgste(mm)) + gmap_pmdp_idte_local(mm, addr); } static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (MACHINE_HAS_TLB_GUEST) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + if (mm_has_pgste(mm)) + gmap_pmdp_idte_global(mm, addr); + } else if (MACHINE_HAS_IDTE) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); - else + if (mm_has_pgste(mm)) + gmap_pmdp_idte_global(mm, addr); + } else { __pmdp_csp(pmdp); + if (mm_has_pgste(mm)) + gmap_pmdp_csp(mm, addr); + } } static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, @@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + gmap_pmdp_invalidate(mm, addr); } else { pmdp_idte_global(mm, addr, pmdp); } -- cgit v1.2.3-59-g8ed1b From 0959e168678d2d95648317e1e5e46bcb358272eb Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 Jul 2018 13:21:22 +0100 Subject: s390/mm: Add huge page dirty sync support To do dirty loging with huge pages, we protect huge pmds in the gmap. When they are written to, we unprotect them and mark them dirty. We introduce the function gmap_test_and_clear_dirty_pmd which handles dirty sync for huge pages. Signed-off-by: Janosch Frank Acked-by: David Hildenbrand --- arch/s390/include/asm/gmap.h | 3 + arch/s390/include/asm/pgtable.h | 3 +- arch/s390/kvm/kvm-s390.c | 25 +++++--- arch/s390/mm/gmap.c | 127 ++++++++++++++++++++++++++++++++++++++-- arch/s390/mm/pgtable.c | 34 +---------- 5 files changed, 148 insertions(+), 44 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 276268b48aff..fcbd638fb9f4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -15,6 +15,7 @@ /* Status bits only for huge segment entries */ #define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ /** * struct gmap_struct - guest address space @@ -139,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], + unsigned long gaddr, unsigned long vmaddr); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 087e0282f165..0e7cb0dc9c33 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1103,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *sptep, pte_t *tptep, pte_t pte); void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address, + pte_t *ptep); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3b7a5151b6a5..4cff5e31ca36 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -511,19 +511,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) } static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot) { + int i; gfn_t cur_gfn, last_gfn; - unsigned long address; + unsigned long gaddr, vmaddr; struct gmap *gmap = kvm->arch.gmap; + DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); - /* Loop over all guest pages */ + /* Loop over all guest segments */ + cur_gfn = memslot->base_gfn; last_gfn = memslot->base_gfn + memslot->npages; - for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { - address = gfn_to_hva_memslot(memslot, cur_gfn); + for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { + gaddr = gfn_to_gpa(cur_gfn); + vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); + if (kvm_is_error_hva(vmaddr)) + continue; + + bitmap_zero(bitmap, _PAGE_ENTRIES); + gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); + for (i = 0; i < _PAGE_ENTRIES; i++) { + if (test_bit(i, bitmap)) + mark_page_dirty(kvm, cur_gfn + i); + } - if (test_and_clear_guest_dirty(gmap->mm, address)) - mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; cond_resched(); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 87c174ee3a8c..3807de25a706 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -521,6 +521,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, rcu_read_unlock(); } +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, + unsigned long gaddr); + /** * gmap_link - set up shadow page tables to connect a host to a guest address * @gmap: pointer to guest mapping meta data structure @@ -541,6 +544,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) p4d_t *p4d; pud_t *pud; pmd_t *pmd; + u64 unprot; int rc; BUG_ON(gmap_is_shadow(gmap)); @@ -598,12 +602,19 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) vmaddr >> PMD_SHIFT, table); if (!rc) { if (pmd_large(*pmd)) { - *table = pmd_val(*pmd) & - _SEGMENT_ENTRY_HARDWARE_BITS_LARGE; + *table = (pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) + | _SEGMENT_ENTRY_GMAP_UC; } else *table = pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS; } + } else if (*table & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { + unprot = (u64)*table; + unprot &= ~_SEGMENT_ENTRY_PROTECT; + unprot |= _SEGMENT_ENTRY_GMAP_UC; + gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); } spin_unlock(&gmap->guest_table_lock); spin_unlock(ptl); @@ -930,11 +941,23 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, { int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; + pmd_t new = *pmdp; /* Fixup needed */ if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) return -EAGAIN; + if (prot == PROT_NONE && !pmd_i) { + pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (prot == PROT_READ && !pmd_p) { + pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; + pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + if (bits & GMAP_NOTIFY_MPROT) pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; @@ -2228,6 +2251,32 @@ static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); } +/** + * gmap_pmdp_xchg - exchange a gmap pmd with another + * @gmap: pointer to the guest address space structure + * @pmdp: pointer to the pmd entry + * @new: replacement entry + * @gaddr: the affected guest address + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, + unsigned long gaddr) +{ + gaddr &= HPAGE_MASK; + pmdp_notify_gmap(gmap, pmdp, gaddr); + pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, + IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *pmdp = new; +} + static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, int purge) { @@ -2243,7 +2292,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, if (pmdp) { gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(pmd_val(*pmdp) & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); if (purge) __pmdp_csp(pmdp); pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; @@ -2296,7 +2346,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) pmdp = (pmd_t *)entry; gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); @@ -2330,7 +2381,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) pmdp = (pmd_t *)entry; gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~_SEGMENT_ENTRY_HARDWARE_BITS_LARGE); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); @@ -2346,6 +2398,71 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) } EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); +/** + * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status + * @gmap: pointer to guest address space + * @pmdp: pointer to the pmd to be tested + * @gaddr: virtual address in the guest address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return false; + + /* Already protected memory, which did not change is clean */ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) + return false; + + /* Clear UC indication and reset protection */ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); + return true; +} + +/** + * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment + * @gmap: pointer to guest address space + * @bitmap: dirty bitmap for this pmd + * @gaddr: virtual address in the guest address space + * @vmaddr: virtual address in the host address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], + unsigned long gaddr, unsigned long vmaddr) +{ + int i; + pmd_t *pmdp; + pte_t *ptep; + spinlock_t *ptl; + + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return; + + if (pmd_large(*pmdp)) { + if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) + bitmap_fill(bitmap, _PAGE_ENTRIES); + } else { + for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { + ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); + if (!ptep) + continue; + if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) + set_bit(i, bitmap); + spin_unlock(ptl); + } + } + gmap_pmd_op_end(gmap, pmdp); +} +EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); + static inline void thp_split_mm(struct mm_struct *mm) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index fe84c0715395..684df964e345 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -704,40 +704,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* * Test and reset if a guest page is dirty */ -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - spinlock_t *ptl; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pgste_t pgste; - pte_t *ptep; pte_t pte; bool dirty; int nodat; - pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return false; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return false; - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return false; - /* We can't run guests backed by huge pages, but userspace can - * still set them up and then try to migrate them without any - * migration support. - */ - if (pmd_large(*pmd)) - return true; - - ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (unlikely(!ptep)) - return false; - pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); pgste_val(pgste) &= ~PGSTE_UC_BIT; @@ -753,11 +727,9 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) *ptep = pte; } pgste_set_unlock(ptep, pgste); - - spin_unlock(ptl); return dirty; } -EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); +EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) -- cgit v1.2.3-59-g8ed1b From 964c2c05c9f3095a18387a57b289cf06de637521 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 13 Jul 2018 11:28:25 +0100 Subject: s390/mm: Clear huge page storage keys on enable_skey When a guest starts using storage keys, we trap and set a default one for its whole valid address space. With this patch we are now able to do that for large pages. To speed up the storage key insertion, we use __storage_key_init_range, which in-turn will use sske_frame to set multiple storage keys with one instruction. As it has been previously used for debuging we have to get rid of the default key check and make it quiescing. Signed-off-by: Dominik Dingel Signed-off-by: Janosch Frank [replaced page_set_storage_key loop with __storage_key_init_range] Reviewed-by: Martin Schwidefsky Reviewed-by: David Hildenbrand --- arch/s390/mm/gmap.c | 32 +++++++++++++++++++++++++++++--- arch/s390/mm/pageattr.c | 6 ++---- 2 files changed, 31 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 3807de25a706..a8e32e60226b 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2539,17 +2539,43 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); * Enable storage key handling from now on and initialize the storage * keys with the default key. */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) +static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) { /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; } +static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, + unsigned long hmask, unsigned long next, + struct mm_walk *walk) +{ + pmd_t *pmd = (pmd_t *)pte; + unsigned long start, end; + + /* + * The write check makes sure we do not set a key on shared + * memory. This is needed as the walker does not differentiate + * between actual guest memory and the process executable or + * shared libraries. + */ + if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || + !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) + return 0; + + start = pmd_val(*pmd) & HPAGE_MASK; + end = start + HPAGE_SIZE - 1; + __storage_key_init_range(start, end); + return 0; +} + int s390_enable_skey(void) { - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_walk walk = { + .hugetlb_entry = __s390_enable_skey_hugetlb, + .pte_entry = __s390_enable_skey_pte, + }; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc = 0; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c44171588d08..f8c6faab41f4 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -14,7 +14,7 @@ static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { - asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" : [addr] "+a" (addr) : [skey] "d" (skey)); return addr; } @@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; - if (!PAGE_DEFAULT_KEY) - return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) continue; } } - page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); start += PAGE_SIZE; } } -- cgit v1.2.3-59-g8ed1b From 3afdfca69870963ae01e280732a5ee493a2fcbb3 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:26 +0100 Subject: s390/mm: Clear skeys for newly mapped huge guest pmds Similarly to the pte skey handling, where we set the storage key to the default key for each newly mapped pte, we have to also do that for huge pmds. With the PG_arch_1 flag we keep track if the area has already been cleared of its skeys. Signed-off-by: Janosch Frank Reviewed-by: Martin Schwidefsky --- arch/s390/include/asm/hugetlb.h | 5 ++++- arch/s390/mm/gmap.c | 2 ++ arch/s390/mm/hugetlbpage.c | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 9c5fc50204dd..2d1afa58a4b6 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define arch_clear_hugepage_flags(page) do { } while (0) +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_arch_1, &page->flags); +} static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a8e32e60226b..a6738c0c4499 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2553,6 +2553,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, { pmd_t *pmd = (pmd_t *)pte; unsigned long start, end; + struct page *page = pmd_page(*pmd); /* * The write check makes sure we do not set a key on shared @@ -2567,6 +2568,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, start = pmd_val(*pmd) & HPAGE_MASK; end = start + HPAGE_SIZE - 1; __storage_key_init_range(start, end); + set_bit(PG_arch_1, &page->flags); return 0; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470..b0246c705a19 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste) return pte; } +static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) +{ + struct page *page; + unsigned long size, paddr; + + if (!mm_uses_skeys(mm) || + rste & _SEGMENT_ENTRY_INVALID) + return; + + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + page = pud_page(__pud(rste)); + size = PUD_SIZE; + paddr = rste & PUD_MASK; + } else { + page = pmd_page(__pmd(rste)); + size = PMD_SIZE; + paddr = rste & PMD_MASK; + } + + if (!test_and_set_bit(PG_arch_1, &page->flags)) + __storage_key_init_range(paddr, paddr + size - 1); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; else rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } -- cgit v1.2.3-59-g8ed1b From 637ff9efe5eab419ea7f2bd6f2cf50f3cb69e322 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:28 +0100 Subject: s390/mm: Add huge pmd storage key handling Storage keys for guests with huge page mappings have to be managed in hardware. There are no PGSTEs for PMDs that we could use to retain the guests's logical view of the key. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand --- arch/s390/mm/pgtable.c | 108 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 98 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 684df964e345..37d68706f5aa 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -410,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + return pmd; +} + pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) { @@ -734,12 +752,36 @@ EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) { - unsigned long keyul; + unsigned long keyul, paddr; spinlock_t *ptl; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + /* + * Huge pmds need quiescing operations, they are + * always mapped. + */ + page_set_storage_key(paddr, key, 1); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -750,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; + unsigned long bits, skey; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); + paddr = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(paddr); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, skey, !nq); + page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; } @@ -813,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + unsigned long paddr; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; int cc = 0; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + cc = page_reset_referenced(paddr); + spin_unlock(ptl); + return cc; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -826,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pgste_val(new) &= ~PGSTE_GR_BIT; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + paddr = pte_val(*ptep) & PAGE_MASK; + cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; } @@ -845,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { + unsigned long paddr; spinlock_t *ptl; pgste_t pgste; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + /* Not yet mapped memory has a zero key */ + spin_unlock(ptl); + *key = 0; + return 0; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + *key = page_get_storage_key(paddr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + paddr = pte_val(*ptep) & PAGE_MASK; if (!(pte_val(*ptep) & _PAGE_INVALID)) - *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(paddr); /* Reflect guest's logical view, not physical */ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); -- cgit v1.2.3-59-g8ed1b From bd096f6443194e57382686a3ac5f2ce4e82b55d7 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 Jul 2018 13:40:22 +0100 Subject: KVM: s390: Add skey emulation fault handling When doing skey emulation for huge guests, we now need to fault in pmds, as we don't have PGSTES anymore to store them when we do not have valid table entries. Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 15 +++++-- arch/s390/kvm/priv.c | 105 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 37 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cff5e31ca36..662f4d8046db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1551,6 +1551,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) uint8_t *keys; uint64_t hva; int srcu_idx, i, r = 0; + bool unlocked; if (args->flags != 0) return -EINVAL; @@ -1575,9 +1576,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + i = 0; down_read(¤t->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < args->count; i++) { + while (i < args->count) { + unlocked = false; hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; @@ -1591,8 +1594,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } r = set_guest_storage_key(current->mm, hva, keys[i], 0); - if (r) - break; + if (r) { + r = fixup_user_fault(current, current->mm, hva, + FAULT_FLAG_WRITE, &unlocked); + if (r) + break; + } + if (!r) + i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index eb0eb60c7be6..cfc5a62329f6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) static int handle_iske(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long gaddr, vmaddr; unsigned char key; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_iske++; @@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = get_guest_storage_key(current->mm, addr, &key); - up_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, vmaddr, &key); + + if (rc) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + up_read(¤t->mm->mmap_sem); vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) static int handle_rrbe(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long vmaddr, gaddr; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_rrbe++; @@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = reset_guest_reference_bit(current->mm, addr); - up_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, vmaddr); + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - + up_read(¤t->mm->mmap_sem); kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned char key, oldkey; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_sske++; @@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unlocked = false; - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - start += PAGE_SIZE; + + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long useraddr; + unsigned long vmaddr; + bool unlocked = false; /* Translate guest address to host address */ - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); - if (kvm_is_error_hva(useraddr)) + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)useraddr, PAGE_SIZE)) + if (clear_user((void __user *)vmaddr, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, useraddr, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - start += PAGE_SIZE; + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; + } } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { -- cgit v1.2.3-59-g8ed1b From 7d735b9ae82d073e23ff7d2648e0aa8056049d16 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 13 Jul 2018 11:28:29 +0100 Subject: s390/mm: hugetlb pages within a gmap can not be freed Guests backed by huge pages could theoretically free unused pages via the diagnose 10 instruction. We currently don't allow that, so we don't have to refault it once it's needed again. Signed-off-by: Dominik Dingel Reviewed-by: Martin Schwidefsky Reviewed-by: David Hildenbrand Signed-off-by: Janosch Frank --- arch/s390/mm/gmap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index a6738c0c4499..736ed32a83c5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -706,6 +706,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + /* + * We do not discard pages that are backed by + * hugetlbfs, so we don't have to refault them. + */ + if (vma && is_vm_hugetlb_page(vma)) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } -- cgit v1.2.3-59-g8ed1b From a9e00d8349c98e0973c8b0d671d69e838f7b5bcc Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:37 +0100 Subject: s390/mm: Add huge page gmap linking support Let's allow huge pmd linking when enabled through the KVM_CAP_S390_HPAGE_1M capability. Also we can now restrict gmap invalidation and notification to the cases where the capability has been activated and save some cycles when that's not the case. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand --- arch/s390/include/asm/mmu.h | 2 ++ arch/s390/include/asm/mmu_context.h | 1 + arch/s390/mm/gmap.c | 9 ++++++--- arch/s390/mm/pgtable.c | 8 ++++---- 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f5ff9dbad8ac..f31a15044c24 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -24,6 +24,8 @@ typedef struct { unsigned int uses_skeys:1; /* The mmu context uses CMM. */ unsigned int uses_cmm:1; + /* The gmaps associated with this context are allowed to use huge pages. */ + unsigned int allow_gmap_hpage_1m:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d16bc79c30bb..0717ee76885d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; + mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { case _REGION2_SIZE: diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 736ed32a83c5..bb44990c8212 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,8 +2,10 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016 + * Copyright IBM Corp. 2007, 2016, 2018 * Author(s): Martin Schwidefsky + * David Hildenbrand + * Janosch Frank */ #include @@ -588,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) + /* Are we allowed to use huge pages? */ + if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ rc = radix_tree_preload(GFP_KERNEL); @@ -1632,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, unsigned long limit; int rc; + BUG_ON(parent->mm->context.allow_gmap_hpage_1m); BUG_ON(gmap_is_shadow(parent)); spin_lock(&parent->shadow_lock); sg = gmap_find_shadow(parent, asce, edat_level); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 37d68706f5aa..f2cc7da473e4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -347,7 +347,7 @@ static inline void pmdp_idte_local(struct mm_struct *mm, mm->context.asce, IDTE_LOCAL); else __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_local(mm, addr); } @@ -357,15 +357,15 @@ static inline void pmdp_idte_global(struct mm_struct *mm, if (MACHINE_HAS_TLB_GUEST) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); } else if (MACHINE_HAS_IDTE) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_idte_global(mm, addr); } else { __pmdp_csp(pmdp); - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) gmap_pmdp_csp(mm, addr); } } -- cgit v1.2.3-59-g8ed1b From a449938297e55e7e8958f8b48583f7d342da1930 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 13 Jul 2018 11:28:31 +0100 Subject: KVM: s390: Add huge page enablement control General KVM huge page support on s390 has to be enabled via the kvm.hpage module parameter. Either nested or hpage can be enabled, as we currently do not support vSIE for huge backed guests. Once the vSIE support is added we will either drop the parameter or enable it as default. For a guest the feature has to be enabled through the new KVM_CAP_S390_HPAGE_1M capability and the hpage module parameter. Enabling it means that cmm can't be enabled for the vm and disables pfmf and storage key interpretation. This is due to the fact that in some cases, in upcoming patches, we have to split huge pages in the guest mapping to be able to set more granular memory protection on 4k pages. These split pages have fake page tables that are not visible to the Linux memory management which subsequently will not manage its PGSTEs, while the SIE will. Disabling these features lets us manage PGSTE data in a consistent matter and solve that problem. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand --- Documentation/virtual/kvm/api.txt | 16 +++++++++++++++ arch/s390/kvm/kvm-s390.c | 42 +++++++++++++++++++++++++++++++++++++-- include/uapi/linux/kvm.h | 1 + 3 files changed, 57 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d10944e619d3..cb8db4f9d097 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4391,6 +4391,22 @@ all such vmexits. Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits. +7.14 KVM_CAP_S390_HPAGE_1M + +Architectures: s390 +Parameters: none +Returns: 0 on success, -EINVAL if hpage module parameter was not set + or cmma is enabled + +With this capability the KVM support for memory backing with 1m pages +through hugetlbfs can be enabled for a VM. After the capability is +enabled, cmma can't be enabled anymore and pfmfi and the storage key +interpretation are disabled. If cmma has already been enabled or the +hpage module parameter is not set to 1, -EINVAL is returned. + +While it is generally possible to create a huge page backed VM without +this capability, the VM will not be able to run. + 8. Other capabilities. ---------------------- diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 662f4d8046db..f9d90337e64a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -172,6 +172,10 @@ static int nested; module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); +/* allow 1m huge page guest backing, if !nested */ +static int hpage; +module_param(hpage, int, 0444); +MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; + case KVM_CAP_S390_HPAGE_1M: + r = 0; + if (hpage) + r = 1; + break; case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; @@ -678,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_HPAGE_1M: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else if (!hpage || kvm->arch.use_cmma) + r = -EINVAL; + else { + r = 0; + kvm->mm->context.allow_gmap_hpage_1m = 1; + /* + * We might have to create fake 4k page + * tables. To avoid that the hardware works on + * stale PGSTEs, we emulate these instructions. + */ + kvm->arch.use_skf = 0; + kvm->arch.use_pfmfi = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -725,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!sclp.has_cmma) break; - ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (kvm->created_vcpus) + ret = -EBUSY; + else if (kvm->mm->context.allow_gmap_hpage_1m) + ret = -EINVAL; + else { kvm->arch.use_cmma = 1; /* Not compatible with cmma. */ kvm->arch.use_pfmfi = 0; @@ -4102,6 +4135,11 @@ static int __init kvm_s390_init(void) return -ENODEV; } + if (nested && hpage) { + pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + return -EINVAL; + } + for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b6270a3b38e9..b955b986b341 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-59-g8ed1b From 5223c671674c507cc9bcabdfc5a9b449c178330a Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 16 Jul 2018 11:08:06 +0200 Subject: s390/cpum_sf: save TOD clock base in SDBs for time conversion Processing the samples in the AUX-area by perf requires the computation of respective time stamps. The time stamps used by perf are based on the monotonic clock. To convert the TOD clock value contained in an SDB to a monotonic clock value, the TOD clock base is required. Hence, also save the TOD clock base in the SDB. Suggested-by: Thomas Richter Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 12 +++++++++--- arch/s390/kernel/perf_cpum_sf.c | 14 +++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index de023a9a88ca..bf2cbff926ef 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -2,7 +2,7 @@ /* * CPU-measurement facilities * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2018 * Author(s): Hendrik Brueckner * Jan Glauber */ @@ -139,8 +139,14 @@ struct hws_trailer_entry { unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ - unsigned long long progusage1; /* 48 - reserved for programming use */ - unsigned long long progusage2; /* */ + union { /* 48 - reserved for programming use */ + struct { + unsigned int clock_base:1; /* in progusage2 */ + unsigned long long progusage1:63; + unsigned long long progusage2; + }; + unsigned long long progusage[2]; + }; } __packed; /* Load program parameter */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0292d68e7dde..cb198d4a6dca 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2,7 +2,7 @@ /* * Performance event support for the System z CPU-measurement Sampling Facility * - * Copyright IBM Corp. 2013 + * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner */ #define KMSG_COMPONENT "cpum_sf" @@ -1587,6 +1587,17 @@ static void aux_buffer_free(void *data) "%lu SDBTs\n", num_sdbt); } +static void aux_sdb_init(unsigned long sdb) +{ + struct hws_trailer_entry *te; + + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + + /* Save clock base */ + te->clock_base = 1; + memcpy(&te->progusage2, &tod_clock_base[1], 8); +} + /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling * @cpu: On which to allocate, -1 means current @@ -1666,6 +1677,7 @@ static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, /* Tail is the entry in a SDBT */ *tail = (unsigned long)pages[i]; aux->sdb_index[i] = (unsigned long)pages[i]; + aux_sdb_init((unsigned long)pages[i]); } sfb->num_sdb = nr_pages; -- cgit v1.2.3-59-g8ed1b From 8cce437fbb5c1f2af2f63834fa05082596beca5d Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Thu, 19 Jul 2018 11:13:45 +0200 Subject: s390/kdump: Fix elfcorehdr size calculation Before the memory for the elfcorehdr is allocated the required size is estimated with alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + mem_chunk_cnt * sizeof(Elf64_Phdr); Where 0x4a0 is used as size for the ELF notes to store the register contend. This size is 8 bytes too small. Usually this does not immediately cause a problem because the page reserved for overhead (Elf_Ehdr, vmcoreinfo, etc.) is pretty generous. So usually there is enough spare memory to counter the mis-calculated per cpu size. However, with growing overhead and/or a huge cpu count the allocated size gets too small for the elfcorehdr. Ultimately a BUG_ON is triggered causing the crash kernel to panic. Fix this by properly calculating the required size instead of relying on magic numbers. Fixes: a62bc07392539 ("s390/kdump: add support for vector extension") Signed-off-by: Philipp Rudo Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/crash_dump.c | 104 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f5ea9d87069..c3620bafc374 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -306,6 +306,15 @@ static void *kzalloc_panic(int len) return rc; } +static const char *nt_name(Elf64_Word type) +{ + const char *name = "LINUX"; + + if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) + name = KEXEC_CORE_NOTE_NAME; + return name; +} + /* * Initialize ELF note */ @@ -332,11 +341,26 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - const char *note_name = "LINUX"; + return nt_init_name(buf, type, desc, d_len, nt_name(type)); +} + +/* + * Calculate the size of ELF note + */ +static size_t nt_size_name(int d_len, const char *name) +{ + size_t size; - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - note_name = KEXEC_CORE_NOTE_NAME; - return nt_init_name(buf, type, desc, d_len, note_name); + size = sizeof(Elf64_Nhdr); + size += roundup(strlen(name) + 1, 4); + size += roundup(d_len, 4); + + return size; +} + +static inline size_t nt_size(Elf64_Word type, int d_len) +{ + return nt_size_name(d_len, nt_name(type)); } /* @@ -374,6 +398,29 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) return ptr; } +/* + * Calculate size of ELF notes per cpu + */ +static size_t get_cpu_elf_notes_size(void) +{ + struct save_area *sa = NULL; + size_t size; + + size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); + size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); + size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); + size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); + size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); + size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); + size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); + size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + } + + return size; +} + /* * Initialize prpsinfo note (new kernel) */ @@ -429,6 +476,30 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +static size_t nt_vmcoreinfo_size(void) +{ + const char *name = "VMCOREINFO"; + char nt_name[11]; + Elf64_Nhdr note; + void *addr; + + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return 0; + + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) + return 0; + + memset(nt_name, 0, sizeof(nt_name)); + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) + return 0; + + if (strcmp(nt_name, name) != 0) + return 0; + + return nt_size_name(note.n_descsz, name); +} + /* * Initialize final note (needed for /proc/vmcore code) */ @@ -539,6 +610,27 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) return ptr; } +static size_t get_elfcorehdr_size(int mem_chunk_cnt) +{ + size_t size; + + size = sizeof(Elf64_Ehdr); + /* PT_NOTES */ + size += sizeof(Elf64_Phdr); + /* nt_prpsinfo */ + size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + /* regsets */ + size += get_cpu_cnt() * get_cpu_elf_notes_size(); + /* nt_vmcoreinfo */ + size += nt_vmcoreinfo_size(); + /* nt_final */ + size += sizeof(Elf64_Nhdr); + /* PT_LOADS */ + size += mem_chunk_cnt * sizeof(Elf64_Phdr); + + return size; +} + /* * Create ELF core header (new kernel) */ @@ -566,8 +658,8 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + - mem_chunk_cnt * sizeof(Elf64_Phdr); + alloc_size = get_elfcorehdr_size(mem_chunk_cnt); + hdr = kzalloc_panic(alloc_size); /* Init elf header */ ptr = ehdr_init(hdr, mem_chunk_cnt); -- cgit v1.2.3-59-g8ed1b From fb7d7518b0d65955f91c7b875c36eae7694c69bd Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 31 Jul 2018 16:14:18 +0200 Subject: s390/numa: move initial setup of node_to_cpumask_map The numa_init_early initcall sets the node_to_cpumask_map[0] to the full cpu_possible_mask. Unfortunately this early_initcall is too late, the NUMA setup for numa=emu is done even earlier. The order of calls is numa_setup() -> emu_update_cpu_topology(), then the early_initcalls(), followed by sched_init_domains(). Starting with git commit 051f3ca02e46432c0965e8948f00c07d8a2f09c0 "sched/topology: Introduce NUMA identity node sched domain" the incorrect node_to_cpumask_map[0] really screws up the domain setup and the kernel panics with the follow oops: Cc: # v4.15+ Signed-off-by: Martin Schwidefsky --- arch/s390/numa/numa.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 06a80434cfe6..5bd374491f94 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -134,26 +134,14 @@ void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); nodes_clear(node_possible_map); + /* Initially attach all possible CPUs to node 0. */ + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); if (mode->setup) mode->setup(); numa_setup_memory(); memblock_dump_all(); } -/* - * numa_init_early() - Initialization initcall - * - * This runs when only one CPU is online and before the first - * topology update is called for by the scheduler. - */ -static int __init numa_init_early(void) -{ - /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - return 0; -} -early_initcall(numa_init_early); - /* * numa_init_late() - Initialization initcall * -- cgit v1.2.3-59-g8ed1b From 5eda25b10297684c1f46a14199ec00210f3c346e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Aug 2018 13:49:47 +0200 Subject: s390/lib: use expoline for all bcr instructions The memove, memset, memcpy, __memset16, __memset32 and __memset64 function have an additional indirect return branch in form of a "bzr" instruction. These need to use expolines as well. Cc: # v4.17+ Fixes: 97489e0663 ("s390/lib: use expoline for indirect branches") Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/lib/mem.S | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 2311f15be9cf..40c4d59c926e 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -17,7 +17,7 @@ ENTRY(memmove) ltgr %r4,%r4 lgr %r1,%r2 - bzr %r14 + jz .Lmemmove_exit aghi %r4,-1 clgr %r2,%r3 jnh .Lmemmove_forward @@ -36,6 +36,7 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) +.Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) @@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove) */ ENTRY(memset) ltgr %r4,%r4 - bzr %r14 + jz .Lmemset_exit ltgr %r3,%r3 jnz .Lmemset_fill aghi %r4,-1 @@ -80,6 +81,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) +.Lmemset_exit: BR_EX %r14 .Lmemset_fill: cghi %r4,1 @@ -115,7 +117,7 @@ EXPORT_SYMBOL(memset) */ ENTRY(memcpy) ltgr %r4,%r4 - bzr %r14 + jz .Lmemcpy_exit aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 @@ -124,6 +126,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) +.Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) @@ -145,9 +148,9 @@ EXPORT_SYMBOL(memcpy) .macro __MEMSET bits,bytes,insn ENTRY(__memset\bits) ltgr %r4,%r4 - bzr %r14 + jz .L__memset_exit\bits cghi %r4,\bytes - je .L__memset_exit\bits + je .L__memset_store\bits aghi %r4,-(\bytes+1) srlg %r5,%r4,8 ltgr %r5,%r5 @@ -163,8 +166,9 @@ ENTRY(__memset\bits) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) BR_EX %r14 -.L__memset_exit\bits: +.L__memset_store\bits: \insn %r3,0(%r2) +.L__memset_exit\bits: BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) -- cgit v1.2.3-59-g8ed1b From 26f843848bae973817b3587780ce6b7b0200d3e4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Aug 2018 14:26:39 +0200 Subject: s390: fix br_r1_trampoline for machines without exrl For machines without the exrl instruction the BFP jit generates code that uses an "br %r1" instruction located in the lowcore page. Unfortunately there is a cut & paste error that puts an additional "larl %r1,.+14" instruction in the code that clobbers the branch target address in %r1. Remove the larl instruction. Cc: # v4.17+ Fixes: de5cb6eb51 ("s390: use expoline thunks in the BPF JIT") Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d2db8acb1a55..9d4399a87fa2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -485,8 +485,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct lowcore, br_r1_trampoline)); -- cgit v1.2.3-59-g8ed1b From 37a366face294facb9c9d9fdd9f5b64a27456cbd Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 7 Aug 2018 18:57:11 +0200 Subject: s390/mm: fix addressing exception after suspend/resume Commit c9b5ad546e7d "s390/mm: tag normal pages vs pages used in page tables" accidentally changed the logic in arch_set_page_states(), which is used by the suspend/resume code. set_page_stable(page, order) was changed to set_page_stable_dat(page, 0). After this, only the first page of higher order pages will be set to stable, and a write to one of the unstable pages will result in an addressing exception. Fix this by using "order" again, instead of "0". Fixes: c9b5ad546e7d ("s390/mm: tag normal pages vs pages used in page tables") Cc: stable@vger.kernel.org # 4.14+ Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/page-states.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 382153ff17e3..dc3cede7f2ec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable_dat(page, 0); + set_page_stable_dat(page, order); else set_page_unused(page, order); } -- cgit v1.2.3-59-g8ed1b