aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/s390/boot
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/boot')
-rw-r--r--arch/s390/boot/.gitignore1
-rw-r--r--arch/s390/boot/Makefile45
-rw-r--r--arch/s390/boot/als.c49
-rw-r--r--arch/s390/boot/alternative.c138
-rw-r--r--arch/s390/boot/boot.h53
-rw-r--r--arch/s390/boot/decompressor.c25
-rw-r--r--arch/s390/boot/decompressor.h8
-rw-r--r--arch/s390/boot/head.S29
-rwxr-xr-xarch/s390/boot/install.sh2
-rw-r--r--arch/s390/boot/ipl_parm.c61
-rw-r--r--arch/s390/boot/ipl_report.c5
-rw-r--r--arch/s390/boot/kaslr.c6
-rw-r--r--arch/s390/boot/kmsan.c6
-rw-r--r--arch/s390/boot/pgm_check.c92
-rw-r--r--arch/s390/boot/pgm_check_info.c179
-rw-r--r--arch/s390/boot/physmem_info.c212
-rw-r--r--arch/s390/boot/printk.c299
-rw-r--r--arch/s390/boot/startup.c512
-rw-r--r--arch/s390/boot/string.c28
-rw-r--r--arch/s390/boot/uv.c15
-rw-r--r--arch/s390/boot/uv.h13
-rw-r--r--arch/s390/boot/vmem.c280
-rw-r--r--arch/s390/boot/vmlinux.lds.S36
23 files changed, 1377 insertions, 717 deletions
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index f5ef099e2fd3..af2a6a7bc028 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -5,4 +5,5 @@ relocs.S
section_cmp.*
vmlinux
vmlinux.lds
+vmlinux.map
vmlinux.syms
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 294f08a8811a..bee49626be4b 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -3,55 +3,42 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
+# Tooling runtimes are unavailable and cannot be linked for early boot code
KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
-
-KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
-KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
+KMSAN_SANITIZE := n
#
-# Use minimum architecture for als.c to be able to print an error
+# Use minimum architecture level so it is possible to print an error
# message if the kernel is started on a machine which is too old
#
-ifndef CONFIG_CC_IS_CLANG
-CC_FLAGS_MARCH_MINIMUM := -march=z900
-else
CC_FLAGS_MARCH_MINIMUM := -march=z10
-endif
-
-ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
-AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM)
-AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
-AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM)
-CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM)
-endif
+
+KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR))
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR))
+KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
+KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
-obj-y += $(if $(CONFIG_PIE_BUILD),machine_kexec_reloc.o,relocs.o)
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
+obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o
+obj-y += uv.o printk.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-$(CONFIG_KMSAN) += kmsan.o
obj-all := $(obj-y) piggy.o syms.o
targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
-ifndef CONFIG_PIE_BUILD
targets += relocs.S
-endif
OBJECTS := $(addprefix $(obj)/,$(obj-y))
OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
@@ -110,13 +97,9 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-ifndef CONFIG_PIE_BUILD
-CMD_RELOCS=arch/s390/tools/relocs
-quiet_cmd_relocs = RELOCS $@
- cmd_relocs = $(CMD_RELOCS) $< > $@
-$(obj)/relocs.S: vmlinux FORCE
- $(call if_changed,relocs)
-endif
+# relocs.S is created by the vmlinux postlink step.
+$(obj)/relocs.S: vmlinux
+ @true
suffix-$(CONFIG_KERNEL_GZIP) := .gz
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index 47c48fbfb563..79afb5fa7f1f 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -9,42 +9,8 @@
#include <asm/sclp.h>
#include "boot.h"
-/*
- * The code within this file will be called very early. It may _not_
- * access anything within the bss section, since that is not cleared
- * yet and may contain data (e.g. initrd) that must be saved by other
- * code.
- * For temporary objects the stack (16k) should be used.
- */
-
static unsigned long als[] = { FACILITIES_ALS };
-static void u16_to_hex(char *str, u16 val)
-{
- int i, num;
-
- for (i = 1; i <= 4; i++) {
- num = (val >> (16 - 4 * i)) & 0xf;
- if (num >= 10)
- num += 7;
- *str++ = '0' + num;
- }
- *str = '\0';
-}
-
-static void print_machine_type(void)
-{
- static char mach_str[80] = "Detected machine-type number: ";
- char type_str[5];
- struct cpuid id;
-
- get_cpu_id(&id);
- u16_to_hex(type_str, id.machine);
- strcat(mach_str, type_str);
- strcat(mach_str, "\n");
- sclp_early_printk(mach_str);
-}
-
static void u16_to_decimal(char *str, u16 val)
{
int div = 1;
@@ -80,8 +46,7 @@ void print_missing_facilities(void)
* z/VM adds a four character prefix.
*/
if (strlen(als_str) > 70) {
- strcat(als_str, "\n");
- sclp_early_printk(als_str);
+ boot_emerg("%s\n", als_str);
*als_str = '\0';
}
u16_to_decimal(val_str, i * BITS_PER_LONG + j);
@@ -89,16 +54,18 @@ void print_missing_facilities(void)
first = 0;
}
}
- strcat(als_str, "\n");
- sclp_early_printk(als_str);
+ boot_emerg("%s\n", als_str);
}
static void facility_mismatch(void)
{
- sclp_early_printk("The Linux kernel requires more recent processor hardware\n");
- print_machine_type();
+ struct cpuid id;
+
+ get_cpu_id(&id);
+ boot_emerg("The Linux kernel requires more recent processor hardware\n");
+ boot_emerg("Detected machine-type number: %4x\n", id.machine);
print_missing_facilities();
- sclp_early_printk("See Principles of Operations for facility bits\n");
+ boot_emerg("See Principles of Operations for facility bits\n");
disabled_wait();
}
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
new file mode 100644
index 000000000000..19ea7934b918
--- /dev/null
+++ b/arch/s390/boot/alternative.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "alt: " fmt
+#include "boot.h"
+
+#define a_debug boot_debug
+
+#include "../kernel/alternative.c"
+
+static void alt_debug_all(int type)
+{
+ int i;
+
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++)
+ alt_debug.facilities[i] = -1UL;
+ break;
+ case ALT_TYPE_FEATURE:
+ for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++)
+ alt_debug.mfeatures[i] = -1UL;
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug.spec = 1;
+ break;
+ }
+}
+
+static void alt_debug_modify(int type, unsigned int nr, bool clear)
+{
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ if (clear)
+ __clear_facility(nr, alt_debug.facilities);
+ else
+ __set_facility(nr, alt_debug.facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ if (clear)
+ __clear_machine_feature(nr, alt_debug.mfeatures);
+ else
+ __set_machine_feature(nr, alt_debug.mfeatures);
+ break;
+ }
+}
+
+static char *alt_debug_parse(int type, char *str)
+{
+ unsigned long val, endval;
+ char *endp;
+ bool clear;
+ int i;
+
+ if (*str == ':') {
+ str++;
+ } else {
+ alt_debug_all(type);
+ return str;
+ }
+ clear = false;
+ if (*str == '!') {
+ alt_debug_all(type);
+ clear = true;
+ str++;
+ }
+ while (*str) {
+ val = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ if (*str == '-') {
+ str++;
+ endval = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ while (val <= endval) {
+ alt_debug_modify(type, val, clear);
+ val++;
+ }
+ } else {
+ alt_debug_modify(type, val, clear);
+ }
+ if (*str != ',')
+ break;
+ str++;
+ }
+ return str;
+}
+
+/*
+ * Use debug-alternative command line parameter for debugging:
+ * "debug-alternative"
+ * -> print debug message for every single alternative
+ *
+ * "debug-alternative=0;2"
+ * -> print debug message for all alternatives with type 0 and 2
+ *
+ * "debug-alternative=0:0-7"
+ * -> print debug message for all alternatives with type 0 and with
+ * facility numbers within the range of 0-7
+ * (if type 0 is ALT_TYPE_FACILITY)
+ *
+ * "debug-alternative=0:!8;1"
+ * -> print debug message for all alternatives with type 0, for all
+ * facility number, except facility 8, and in addition print all
+ * alternatives with type 1
+ */
+void alt_debug_setup(char *str)
+{
+ unsigned long type;
+ char *endp;
+ int i;
+
+ if (!str) {
+ alt_debug_all(ALT_TYPE_FACILITY);
+ alt_debug_all(ALT_TYPE_FEATURE);
+ alt_debug_all(ALT_TYPE_SPEC);
+ return;
+ }
+ while (*str) {
+ type = simple_strtoull(str, &endp, 0);
+ if (str == endp)
+ break;
+ str = endp;
+ switch (type) {
+ case ALT_TYPE_FACILITY:
+ case ALT_TYPE_FEATURE:
+ str = alt_debug_parse(type, str);
+ break;
+ case ALT_TYPE_SPEC:
+ alt_debug_all(ALT_TYPE_SPEC);
+ break;
+ }
+ if (*str != ';')
+ break;
+ str++;
+ }
+}
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 567d60f78bbc..e045cae6e80a 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,16 +8,10 @@
#ifndef __ASSEMBLY__
+#include <linux/printk.h>
#include <asm/physmem_info.h>
-struct machine_info {
- unsigned char has_edat1 : 1;
- unsigned char has_edat2 : 1;
- unsigned char has_nx : 1;
-};
-
struct vmlinux_info {
- unsigned long default_lma;
unsigned long entry;
unsigned long image_size; /* does not include .bss */
unsigned long bss_size; /* uncompressed image .bss size */
@@ -25,18 +19,14 @@ struct vmlinux_info {
unsigned long bootdata_size;
unsigned long bootdata_preserved_off;
unsigned long bootdata_preserved_size;
-#ifdef CONFIG_PIE_BUILD
- unsigned long dynsym_start;
- unsigned long rela_dyn_start;
- unsigned long rela_dyn_end;
-#else
unsigned long got_start;
unsigned long got_end;
-#endif
unsigned long amode31_size;
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
+ unsigned long alt_instructions;
+ unsigned long alt_instructions_end;
#ifdef CONFIG_KASAN
unsigned long kasan_early_shadow_page_off;
unsigned long kasan_early_shadow_pte_off;
@@ -53,13 +43,16 @@ void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align);
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align);
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
unsigned long get_physmem_alloc_pos(void);
+void dump_physmem_reserved(void);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void);
@@ -71,15 +64,33 @@ void parse_boot_command_line(void);
void verify_facilities(void);
void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
-void print_pgm_check_info(void);
+void alt_debug_setup(char *str);
+void do_pgm_check(struct pt_regs *regs);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
-void setup_vmem(unsigned long asce_limit);
-void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
+int __printf(1, 2) boot_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
+int get_random(unsigned long limit, unsigned long *value);
+void boot_rb_dump(void);
+
+#ifndef boot_fmt
+#define boot_fmt(fmt) fmt
+#endif
+
+#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__)
+#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__)
extern struct machine_info machine;
+extern int boot_console_loglevel;
+extern bool boot_ignore_loglevel;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -95,9 +106,15 @@ extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
+
#define vmlinux _vmlinux_info
+#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore))
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
+#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
+#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
+#define __identity_va(x) ((void *)((unsigned long)(x) + __identity_base))
+#define __identity_pa(x) ((unsigned long)(x) - __identity_base)
static inline bool intersects(unsigned long addr0, unsigned long size0,
unsigned long addr1, unsigned long size1)
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index d762733a0753..03500b9d9fb9 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
+#include <asm/boot_data.h>
#include <asm/page.h>
#include "decompressor.h"
#include "boot.h"
@@ -63,24 +64,22 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
-#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
+static void decompress_error(char *m)
+{
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Decompression error: %s\n", m);
+ boot_emerg(" -- System halted\n");
+ disabled_wait();
+}
unsigned long mem_safe_offset(void)
{
- /*
- * due to 4MB HEAD_SIZE for bzip2
- * 'decompress_offset + vmlinux.image_size' could be larger than
- * kernel at final position + its .bss, so take the larger of two
- */
- return max(decompress_offset + vmlinux.image_size,
- vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
+ return ALIGN(free_mem_end_ptr, PAGE_SIZE);
}
-void *decompress_kernel(void)
+void deploy_kernel(void *output)
{
- void *output = (void *)decompress_offset;
-
__decompress(_compressed_start, _compressed_end - _compressed_start,
- NULL, NULL, output, vmlinux.image_size, NULL, error);
- return output;
+ NULL, NULL, output, vmlinux.image_size, NULL, decompress_error);
}
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index 92b81d2ea35d..4f966f06bd65 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,11 +2,9 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H
-#ifdef CONFIG_KERNEL_UNCOMPRESSED
-static inline void *decompress_kernel(void) { return NULL; }
-#else
-void *decompress_kernel(void);
-#endif
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
unsigned long mem_safe_offset(void);
+void deploy_kernel(void *output);
+#endif
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index 637c29c3f6e3..0b511d5c030b 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal)
xc 0xf00(256),0xf00
larl %r13,.Lctl
lctlg %c0,%c15,0(%r13) # load control registers
- stcke __LC_BOOT_CLOCK
- mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
+ larl %r13,tod_clock_base
+ stcke 0(%r13)
+ mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13)
larl %r13,6f
spt 0(%r13)
mvc __LC_LAST_UPDATE_TIMER(8),0(%r13)
@@ -292,18 +293,12 @@ SYM_CODE_END(startup_normal)
#include "head_kdump.S"
-#
-# This program check is active immediately after kernel start
-# and until early_pgm_check_handler is set in kernel/early.c
-# It simply saves general/control registers and psw in
-# the save area and does disabled wait with a faulty address.
-#
SYM_CODE_START_LOCAL(startup_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ stmg %r8,%r15,__LC_SAVE_AREA
la %r8,4095
stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
- mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
+ mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA
mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
ni __LC_RETURN_PSW,0xfc # remove IO and EX bits
@@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler)
oi __LC_RETURN_PSW+1,0x2 # set wait state bit
larl %r9,.Lold_psw_disabled_wait
stg %r9,__LC_PGM_NEW_PSW+8
- larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD
- brasl %r14,print_pgm_check_info
+ larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
+ mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8)
+ mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8)
+ mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK
+ mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE
+ brasl %r14,do_pgm_check
+ larl %r9,startup_pgm_check_handler
+ stg %r9,__LC_PGM_NEW_PSW+8
+ mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ lpswe __LC_RETURN_PSW
.Lold_psw_disabled_wait:
la %r8,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index a13dd2f2aa1c..fa41486258ee 100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -15,6 +15,8 @@
# $3 - kernel map file
# $4 - default install path (blank if root directory)
+set -e
+
echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
"bootloader config required" >&2
if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index b24de9aabf7d..f584d7da29cb 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,7 +3,9 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -33,29 +35,14 @@ int vmalloc_size_set;
static inline int __diag308(unsigned long subcode, void *addr)
{
- unsigned long reg1, reg2;
- union register_pair r1;
- psw_t old;
-
- r1.even = (unsigned long) addr;
- r1.odd = 0;
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 };
+
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [r1] "+&d" (r1.pair),
- [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [subcode] "d" (subcode),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+ "0:\n"
+ EX_TABLE(0b, 0b)
+ : [r1] "+d" (r1.pair)
+ : [subcode] "d" (subcode)
: "cc", "memory");
return r1.odd;
}
@@ -192,7 +179,7 @@ void setup_boot_command_line(void)
if (has_ebcdic_char(parmarea.command_line))
EBCASC(parmarea.command_line, COMMAND_LINE_SIZE);
/* copy arch command line */
- strcpy(early_command_line, strim(parmarea.command_line));
+ strscpy(early_command_line, strim(parmarea.command_line));
/* append IPL PARM data to the boot command line */
if (!is_prot_virt_guest() && ipl_block_valid)
@@ -214,7 +201,7 @@ static void check_cleared_facilities(void)
for (i = 0; i < ARRAY_SIZE(als); i++) {
if ((stfle_fac_list[i] & als[i]) != als[i]) {
- sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n");
+ boot_emerg("The Linux kernel requires facilities cleared via command line option\n");
print_missing_facilities();
break;
}
@@ -266,7 +253,8 @@ void parse_boot_command_line(void)
int rc;
__kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
- args = strcpy(command_line_buf, early_command_line);
+ strscpy(command_line_buf, early_command_line);
+ args = command_line_buf;
while (*args) {
args = next_arg(args, &param, &val);
@@ -294,6 +282,9 @@ void parse_boot_command_line(void)
if (!strcmp(param, "facilities") && val)
modify_fac_list(val);
+ if (!strcmp(param, "debug-alternative"))
+ alt_debug_setup(val);
+
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
@@ -310,5 +301,25 @@ void parse_boot_command_line(void)
prot_virt_host = 1;
}
#endif
+ if (!strcmp(param, "relocate_lowcore") && test_facility(193))
+ set_machine_feature(MFEATURE_LOWCORE);
+ if (!strcmp(param, "earlyprintk"))
+ boot_earlyprintk = true;
+ if (!strcmp(param, "debug"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
+ if (!strcmp(param, "bootdebug")) {
+ bootdebug = true;
+ if (val)
+ strscpy(bootdebug_filter, val);
+ }
+ if (!strcmp(param, "quiet"))
+ boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET;
+ if (!strcmp(param, "ignore_loglevel"))
+ boot_ignore_loglevel = true;
+ if (!strcmp(param, "loglevel")) {
+ boot_console_loglevel = simple_strtoull(val, NULL, 10);
+ if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN)
+ boot_console_loglevel = CONSOLE_LOGLEVEL_MIN;
+ }
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 1803035e68d2..f73cd757a5f7 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void)
{
struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp;
- size_t size;
/*
* Find the length for the IPL report boot data
@@ -106,7 +105,7 @@ int read_ipl_report(void)
* the IPL parameter list, then align the address to a double
* word boundary.
*/
- tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr;
pl_hdr = (struct ipl_pl_hdr *) tmp;
tmp = (tmp + pl_hdr->len + 7) & -8UL;
rl_hdr = (struct ipl_rl_hdr *) tmp;
@@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void)
return;
size = get_cert_comp_list_size();
- early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+ early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
copy_components_bootdata();
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 90602101e2ae..941f4c9e27cc 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -32,7 +32,7 @@ struct prng_parm {
static int check_prng(void)
{
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
- sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+ boot_warn("KASLR disabled: CPU has no PRNG\n");
return 0;
}
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -43,7 +43,7 @@ static int check_prng(void)
return PRNG_MODE_TDES;
}
-static int get_random(unsigned long limit, unsigned long *value)
+int get_random(unsigned long limit, unsigned long *value)
{
struct prng_parm prng = {
/* initial parameter block for tdes mode, copied from libica */
@@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a
* cannot have chains.
*
* On the other hand, "dynamic" or "repetitive" allocations are done via
- * physmem_alloc_top_down(). These allocations are tightly packed together
+ * physmem_alloc_or_die(). These allocations are tightly packed together
* top down from the end of online memory. physmem_alloc_pos represents
* current position where those allocations start.
*
diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c
new file mode 100644
index 000000000000..e7b3ac48143e
--- /dev/null
+++ b/arch/s390/boot/kmsan.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmsan-checks.h>
+
+void kmsan_unpoison_memory(const void *address, size_t size)
+{
+}
diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c
new file mode 100644
index 000000000000..fa621fa5bc02
--- /dev/null
+++ b/arch/s390/boot/pgm_check.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+void print_stacktrace(unsigned long sp)
+{
+ struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
+ (unsigned long)_stack_end };
+ bool first = true;
+
+ boot_emerg("Call Trace:\n");
+ while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
+ struct stack_frame *sf = (struct stack_frame *)sp;
+
+ if (first)
+ boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ else
+ boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]);
+ if (sf->back_chain <= sp)
+ break;
+ sp = sf->back_chain;
+ first = false;
+ }
+}
+
+extern struct exception_table_entry __start___ex_table[];
+extern struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+
+static bool ex_handler(struct pt_regs *regs)
+{
+ const struct exception_table_entry *ex;
+
+ for (ex = __start___ex_table; ex < __stop___ex_table; ex++) {
+ if (extable_insn(ex) != regs->psw.addr)
+ continue;
+ if (ex->type != EX_TYPE_FIXUP)
+ return false;
+ regs->psw.addr = extable_fixup(ex);
+ return true;
+ }
+ return false;
+}
+
+void do_pgm_check(struct pt_regs *regs)
+{
+ struct psw_bits *psw = &psw_bits(regs->psw);
+ unsigned long *gpregs = regs->gprs;
+
+ if (ex_handler(regs))
+ return;
+ if (bootdebug)
+ boot_rb_dump();
+ boot_emerg("Linux version %s\n", kernel_version);
+ if (!is_prot_virt_guest() && early_command_line[0])
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Kernel fault: interruption code %04x ilc:%d\n",
+ regs->int_code & 0xffff, regs->int_code >> 17);
+ if (kaslr_enabled()) {
+ boot_emerg("Kernel random base: %lx\n", __kaslr_offset);
+ boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ }
+ boot_emerg("PSW : %016lx %016lx (%pS)\n",
+ regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr);
+ boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
+ psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
+ psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba);
+ boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
+ boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
+ print_stacktrace(gpregs[15]);
+ boot_emerg("Last Breaking-Event-Address:\n");
+ boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break);
+ /* Convert to disabled wait PSW */
+ psw->io = 0;
+ psw->ext = 0;
+ psw->wait = 1;
+}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
deleted file mode 100644
index 97244cd7a206..000000000000
--- a/arch/s390/boot/pgm_check_info.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/stdarg.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <asm/stacktrace.h>
-#include <asm/boot_data.h>
-#include <asm/lowcore.h>
-#include <asm/setup.h>
-#include <asm/sclp.h>
-#include <asm/uv.h>
-#include "boot.h"
-
-const char hex_asc[] = "0123456789abcdef";
-
-static char *as_hex(char *dst, unsigned long val, int pad)
-{
- char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
-
- for (*p-- = 0; p >= dst; val >>= 4)
- *p-- = hex_asc[val & 0x0f];
- return end;
-}
-
-static char *symstart(char *p)
-{
- while (*p)
- p--;
- return p + 1;
-}
-
-static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
-{
- /* symbol entries are in a form "10000 c4 startup\0" */
- char *a = _decompressor_syms_start;
- char *b = _decompressor_syms_end;
- unsigned long start;
- unsigned long size;
- char *pivot;
- char *endp;
-
- while (a < b) {
- pivot = symstart(a + (b - a) / 2);
- start = simple_strtoull(pivot, &endp, 16);
- size = simple_strtoull(endp + 1, &endp, 16);
- if (ip < start) {
- b = pivot;
- continue;
- }
- if (ip > start + size) {
- a = pivot + strlen(pivot) + 1;
- continue;
- }
- *off = ip - start;
- *len = size;
- return endp + 1;
- }
- return NULL;
-}
-
-static noinline char *strsym(void *ip)
-{
- static char buf[64];
- unsigned short off;
- unsigned short len;
- char *p;
-
- p = findsym((unsigned long)ip, &off, &len);
- if (p) {
- strncpy(buf, p, sizeof(buf));
- /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
- p = buf + strnlen(buf, sizeof(buf) - 15);
- strcpy(p, "+0x");
- p = as_hex(p + 3, off, 0);
- strcpy(p, "/0x");
- as_hex(p + 3, len, 0);
- } else {
- as_hex(buf, (unsigned long)ip, 16);
- }
- return buf;
-}
-
-void decompressor_printk(const char *fmt, ...)
-{
- char buf[1024] = { 0 };
- char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
- unsigned long pad;
- char *p = buf;
- va_list args;
-
- va_start(args, fmt);
- for (; p < end && *fmt; fmt++) {
- if (*fmt != '%') {
- *p++ = *fmt;
- continue;
- }
- pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0;
- switch (*fmt) {
- case 's':
- p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf));
- break;
- case 'p':
- if (*++fmt != 'S')
- goto out;
- p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf));
- break;
- case 'l':
- if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned long), pad);
- break;
- case 'x':
- if (end - p <= max(sizeof(int) * 2, pad))
- goto out;
- p = as_hex(p, va_arg(args, unsigned int), pad);
- break;
- default:
- goto out;
- }
- }
-out:
- va_end(args);
- sclp_early_printk(buf);
-}
-
-void print_stacktrace(unsigned long sp)
-{
- struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
- (unsigned long)_stack_end };
- bool first = true;
-
- decompressor_printk("Call Trace:\n");
- while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) {
- struct stack_frame *sf = (struct stack_frame *)sp;
-
- decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" :
- " sp:%016lx [<%016lx>] %pS\n",
- sp, sf->gprs[8], (void *)sf->gprs[8]);
- if (sf->back_chain <= sp)
- break;
- sp = sf->back_chain;
- first = false;
- }
-}
-
-void print_pgm_check_info(void)
-{
- unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area;
- struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area);
-
- decompressor_printk("Linux version %s\n", kernel_version);
- if (!is_prot_virt_guest() && early_command_line[0])
- decompressor_printk("Kernel command line: %s\n", early_command_line);
- decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
- S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
- if (kaslr_enabled())
- decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
- decompressor_printk("PSW : %016lx %016lx (%pS)\n",
- S390_lowcore.psw_save_area.mask,
- S390_lowcore.psw_save_area.addr,
- (void *)S390_lowcore.psw_save_area.addr);
- decompressor_printk(
- " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
- psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
- psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri,
- psw->eaba);
- decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n",
- gpregs[0], gpregs[1], gpregs[2], gpregs[3]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[4], gpregs[5], gpregs[6], gpregs[7]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
- decompressor_printk(" %016lx %016lx %016lx %016lx\n",
- gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
- print_stacktrace(S390_lowcore.gpregs_save_area[15]);
- decompressor_printk("Last Breaking-Event-Address:\n");
- decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
- (void *)S390_lowcore.pgm_last_break);
-}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 0cf79826eef9..45e3d057cfaa 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "physmem: " fmt
#include <linux/processor.h>
#include <linux/errno.h>
#include <linux/init.h>
@@ -9,6 +10,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sclp.h>
+#include <asm/asm.h>
#include <asm/uv.h>
#include "decompressor.h"
#include "boot.h"
@@ -27,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
- RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+ RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
@@ -57,37 +59,25 @@ void add_physmem_online_range(u64 start, u64 end)
static int __diag260(unsigned long rx1, unsigned long rx2)
{
- unsigned long reg1, reg2, ry;
union register_pair rx;
- psw_t old;
- int rc;
+ int cc, exception;
+ unsigned long ry;
rx.even = rx1;
rx.odd = rx2;
ry = 0x10; /* storage configuration */
- rc = -1; /* fail */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ exception = 1;
+ asm_inline volatile(
" diag %[rx],%[ry],0x260\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [ry] "+&d" (ry),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [rx] "d" (rx.pair),
- [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
- : "cc", "memory");
- return rc == 0 ? ry : -1;
+ "0: lhi %[exc],0\n"
+ "1:\n"
+ CC_IPM(cc)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry)
+ : [rx] "d" (rx.pair)
+ : CC_CLOBBER_LIST("memory"));
+ cc = exception ? -1 : CC_TRANSFORM(cc);
+ return cc == 0 ? ry : -1;
}
static int diag260(void)
@@ -109,33 +99,44 @@ static int diag260(void)
return 0;
}
+#define DIAG500_SC_STOR_LIMIT 4
+
+static int diag500_storage_limit(unsigned long *max_physmem_end)
+{
+ unsigned long storage_limit;
+
+ asm_inline volatile(
+ " lghi %%r1,%[subcode]\n"
+ " lghi %%r2,0\n"
+ " diag %%r2,%%r4,0x500\n"
+ "0: lgr %[slimit],%%r2\n"
+ EX_TABLE(0b, 0b)
+ : [slimit] "=d" (storage_limit)
+ : [subcode] "i" (DIAG500_SC_STOR_LIMIT)
+ : "memory", "1", "2");
+ if (!storage_limit)
+ return -EINVAL;
+ /* Convert inclusive end to exclusive end */
+ *max_physmem_end = storage_limit + 1;
+ return 0;
+}
+
static int tprot(unsigned long addr)
{
- unsigned long reg1, reg2;
- int rc = -EFAULT;
- psw_t old;
-
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ int cc, exception;
+
+ exception = 1;
+ asm_inline volatile(
" tprot 0(%[addr]),0\n"
- " ipm %[rc]\n"
- " srl %[rc],28\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- "=Q" (S390_lowcore.program_new_psw.addr),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
- [addr] "a" (addr)
- : "cc", "memory");
- return rc;
+ "0: lhi %[exc],0\n"
+ "1:\n"
+ CC_IPM(cc)
+ EX_TABLE(0b, 1b)
+ : CC_OUT(cc, cc), [exc] "+d" (exception)
+ : [addr] "a" (addr)
+ : CC_CLOBBER_LIST("memory"));
+ cc = exception ? -EFAULT : CC_TRANSFORM(cc);
+ return cc;
}
static unsigned long search_mem_end(void)
@@ -157,30 +158,48 @@ unsigned long detect_max_physmem_end(void)
{
unsigned long max_physmem_end = 0;
- if (!sclp_early_get_memsize(&max_physmem_end)) {
+ if (!diag500_storage_limit(&max_physmem_end)) {
+ physmem_info.info_source = MEM_DETECT_DIAG500_STOR_LIMIT;
+ } else if (!sclp_early_get_memsize(&max_physmem_end)) {
physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
} else {
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
+ boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end,
+ get_physmem_info_source());
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
+ unsigned long start, end;
+ int i;
+
if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
+ } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) {
+ unsigned long online_end;
+
+ if (!sclp_early_get_memsize(&online_end)) {
+ physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+ add_physmem_online_range(0, online_end);
+ }
} else if (!diag260()) {
physmem_info.info_source = MEM_DETECT_DIAG260;
} else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end);
}
+ boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source());
+ for_each_physmem_online_range(i, &start, &end)
+ boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end);
}
void physmem_set_usable_limit(unsigned long limit)
{
physmem_info.usable = limit;
physmem_alloc_pos = limit;
+ boot_debug("Usable memory limit: 0x%016lx\n", limit);
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
@@ -190,38 +209,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min,
enum reserved_range_type t;
int i;
- decompressor_printk("Linux version %s\n", kernel_version);
+ boot_emerg("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
- decompressor_printk("Kernel command line: %s\n", early_command_line);
- decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
- size, align, min, max);
- decompressor_printk("Reserved memory ranges:\n");
+ boot_emerg("Kernel command line: %s\n", early_command_line);
+ boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n",
+ size, align, min, max);
+ boot_emerg("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
- decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+ boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
- decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
- get_physmem_info_source(), physmem_info.info_source);
+ boot_emerg("Usable online memory ranges (info source: %s [%d]):\n",
+ get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
- decompressor_printk("%016lx %016lx\n", start, end);
+ boot_emerg("%016lx %016lx\n", start, end);
total_mem += end - start;
}
- decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
- total_mem, total_reserved_mem,
- total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+ boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n",
+ total_mem, total_reserved_mem,
+ total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
print_stacktrace(current_frame_address());
- sclp_early_printk("\n\n -- System halted\n");
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
-void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
}
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size,
+ get_rr_type_name(type));
+}
+
void physmem_free(enum reserved_range_type type)
{
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start,
+ physmem_info.reserved[type].end, get_rr_type_name(type));
physmem_info.reserved[type].start = 0;
physmem_info.reserved[type].end = 0;
}
@@ -288,41 +316,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
- physmem_reserve(type, addr, size);
+ _physmem_reserve(type, addr, size);
+ boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size,
+ get_rr_type_name(type));
return addr;
}
-unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
- unsigned long align)
+unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size,
+ unsigned long align, bool die_on_oom)
{
struct reserved_range *range = &physmem_info.reserved[type];
- struct reserved_range *new_range;
+ struct reserved_range *new_range = NULL;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
- &ranges_left, true);
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
- physmem_alloc_pos = __physmem_alloc_range(
- sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
- physmem_alloc_ranges, &ranges_left, true);
- new_range = (struct reserved_range *)physmem_alloc_pos;
+ addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0,
+ physmem_alloc_pos, physmem_alloc_ranges,
+ &ranges_left, true);
+ new_range = (struct reserved_range *)addr;
+ addr = __physmem_alloc_range(size, align, 0, addr, ranges_left,
+ &ranges_left, die_on_oom);
+ if (!addr)
+ return 0;
*new_range = *range;
range->chain = new_range;
- addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
- ranges_left, &ranges_left, true);
}
range->end = addr + size;
}
+ if (type != RR_VMEM) {
+ boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:",
+ addr, addr + size, get_rr_type_name(type), align, !!new_range);
+ }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
}
+unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size,
+ unsigned long align)
+{
+ return physmem_alloc(type, size, align, true);
+}
+
unsigned long get_physmem_alloc_pos(void)
{
return physmem_alloc_pos;
}
+
+void dump_physmem_reserved(void)
+{
+ struct reserved_range *range;
+ enum reserved_range_type t;
+ unsigned long start, end;
+
+ boot_debug("Reserved memory ranges:\n");
+ for_each_physmem_reserved_range(t, range, &start, &end) {
+ if (end) {
+ boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n",
+ get_rr_type_name(t), start, end, (unsigned long)range,
+ (unsigned long)range->chain);
+ }
+ }
+}
diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c
new file mode 100644
index 000000000000..4bb6bc95704e
--- /dev/null
+++ b/arch/s390/boot/printk.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/stdarg.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
+#include <asm/sections.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
+#include "boot.h"
+
+int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT;
+bool boot_ignore_loglevel;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+static void boot_rb_add(const char *str, size_t len)
+{
+ /* leave double '\0' in the end */
+ size_t avail = sizeof(boot_rb) - boot_rb_off - 1;
+
+ /* store strings separated by '\0' */
+ if (len + 1 > avail)
+ boot_rb_off = 0;
+ avail = sizeof(boot_rb) - boot_rb_off - 1;
+ strscpy(boot_rb + boot_rb_off, str, avail);
+ boot_rb_off += len + 1;
+}
+
+static void print_rb_entry(const char *str)
+{
+ sclp_early_printk(printk_skip_level(str));
+}
+
+static bool debug_messages_printed(void)
+{
+ return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG);
+}
+
+void boot_rb_dump(void)
+{
+ if (debug_messages_printed())
+ return;
+ sclp_early_printk("Boot messages ring buffer:\n");
+ boot_rb_foreach(print_rb_entry);
+}
+
+const char hex_asc[] = "0123456789abcdef";
+
+static char *as_hex(char *dst, unsigned long val, int pad)
+{
+ char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1);
+
+ for (*p-- = '\0'; p >= dst; val >>= 4)
+ *p-- = hex_asc[val & 0x0f];
+ return dst;
+}
+
+#define MAX_NUMLEN 21
+static char *as_dec(char *buf, unsigned long val, bool is_signed)
+{
+ bool negative = false;
+ char *p = buf + MAX_NUMLEN;
+
+ if (is_signed && (long)val < 0) {
+ val = (val == LONG_MIN ? LONG_MIN : -(long)val);
+ negative = true;
+ }
+
+ *--p = '\0';
+ do {
+ *--p = '0' + (val % 10);
+ val /= 10;
+ } while (val);
+
+ if (negative)
+ *--p = '-';
+ return p;
+}
+
+static ssize_t strpad(char *dst, size_t dst_size, const char *src,
+ int _pad, bool zero_pad, bool decimal)
+{
+ ssize_t len = strlen(src), pad = _pad;
+ char *p = dst;
+
+ if (max(len, abs(pad)) >= dst_size)
+ return -E2BIG;
+
+ if (pad > len) {
+ if (decimal && zero_pad && *src == '-') {
+ *p++ = '-';
+ src++;
+ len--;
+ pad--;
+ }
+ memset(p, zero_pad ? '0' : ' ', pad - len);
+ p += pad - len;
+ }
+ memcpy(p, src, len);
+ p += len;
+ if (pad < 0 && -pad > len) {
+ memset(p, ' ', -pad - len);
+ p += -pad - len;
+ }
+ *p = '\0';
+ return p - dst;
+}
+
+static char *symstart(char *p)
+{
+ while (*p)
+ p--;
+ return p + 1;
+}
+
+static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len)
+{
+ /* symbol entries are in a form "10000 c4 startup\0" */
+ char *a = _decompressor_syms_start;
+ char *b = _decompressor_syms_end;
+ unsigned long start;
+ unsigned long size;
+ char *pivot;
+ char *endp;
+
+ while (a < b) {
+ pivot = symstart(a + (b - a) / 2);
+ start = simple_strtoull(pivot, &endp, 16);
+ size = simple_strtoull(endp + 1, &endp, 16);
+ if (ip < start) {
+ b = pivot;
+ continue;
+ }
+ if (ip > start + size) {
+ a = pivot + strlen(pivot) + 1;
+ continue;
+ }
+ *off = ip - start;
+ *len = size;
+ return endp + 1;
+ }
+ return NULL;
+}
+
+#define MAX_SYMLEN 64
+static noinline char *strsym(char *buf, void *ip)
+{
+ unsigned short off;
+ unsigned short len;
+ char *p;
+
+ p = findsym((unsigned long)ip, &off, &len);
+ if (p) {
+ strscpy(buf, p, MAX_SYMLEN);
+ /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */
+ p = buf + strnlen(buf, MAX_SYMLEN - 15);
+ strscpy(p, "+0x", MAX_SYMLEN - (p - buf));
+ as_hex(p + 3, off, 0);
+ strcat(p, "/0x");
+ as_hex(p + strlen(p), len, 0);
+ } else {
+ as_hex(buf, (unsigned long)ip, 16);
+ }
+ return buf;
+}
+
+static inline int printk_loglevel(const char *buf)
+{
+ if (buf[0] == KERN_SOH_ASCII && buf[1]) {
+ switch (buf[1]) {
+ case '0' ... '7':
+ return buf[1] - '0';
+ }
+ }
+ return MESSAGE_LOGLEVEL_DEFAULT;
+}
+
+static void boot_console_earlyprintk(const char *buf)
+{
+ int level = printk_loglevel(buf);
+
+ /* always print emergency messages */
+ if (level > LOGLEVEL_EMERG && !boot_earlyprintk)
+ return;
+ buf = printk_skip_level(buf);
+ /* print debug messages only when bootdebug is enabled */
+ if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf))))
+ return;
+ if (boot_ignore_loglevel || level < boot_console_loglevel)
+ sclp_early_printk(buf);
+}
+
+static char *add_timestamp(char *buf)
+{
+#ifdef CONFIG_PRINTK_TIME
+ unsigned long ns = tod_to_ns(__get_tod_clock_monotonic());
+ char ts[MAX_NUMLEN];
+
+ *buf++ = '[';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0);
+ *buf++ = '.';
+ buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0);
+ *buf++ = ']';
+ *buf++ = ' ';
+#endif
+ return buf;
+}
+
+#define va_arg_len_type(args, lenmod, typemod) \
+ ((lenmod == 'l') ? va_arg(args, typemod long) : \
+ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \
+ (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \
+ (lenmod == 'z') ? va_arg(args, typemod long) : \
+ va_arg(args, typemod int))
+
+int boot_printk(const char *fmt, ...)
+{
+ char buf[1024] = { 0 };
+ char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */
+ bool zero_pad, decimal;
+ char *strval, *p = buf;
+ char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)];
+ va_list args;
+ char lenmod;
+ ssize_t len;
+ int pad;
+
+ *p++ = KERN_SOH_ASCII;
+ *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT;
+ p = add_timestamp(p);
+ fmt = printk_skip_level(fmt);
+
+ va_start(args, fmt);
+ for (; p < end && *fmt; fmt++) {
+ if (*fmt != '%') {
+ *p++ = *fmt;
+ continue;
+ }
+ if (*++fmt == '%') {
+ *p++ = '%';
+ continue;
+ }
+ zero_pad = (*fmt == '0');
+ pad = simple_strtol(fmt, (char **)&fmt, 10);
+ lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0;
+ if (lenmod == 'h' && *fmt == 'h') {
+ lenmod = 'H';
+ fmt++;
+ }
+ decimal = false;
+ switch (*fmt) {
+ case 's':
+ if (lenmod)
+ goto out;
+ strval = va_arg(args, char *);
+ zero_pad = false;
+ break;
+ case 'p':
+ if (*++fmt != 'S' || lenmod)
+ goto out;
+ strval = strsym(valbuf, va_arg(args, void *));
+ zero_pad = false;
+ break;
+ case 'd':
+ case 'i':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1);
+ decimal = true;
+ break;
+ case 'u':
+ strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+ break;
+ case 'x':
+ strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0);
+ break;
+ default:
+ goto out;
+ }
+ len = strpad(p, end - p, strval, pad, zero_pad, decimal);
+ if (len == -E2BIG)
+ break;
+ p += len;
+ }
+out:
+ va_end(args);
+ len = strlen(buf);
+ if (len) {
+ boot_rb_add(buf, len);
+ boot_console_earlyprintk(buf);
+ }
+ return len;
+}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 6cf89314209a..da8337e63a3e 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,12 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "startup: " fmt
#include <linux/string.h>
#include <linux/elf.h>
#include <asm/page-states.h>
#include <asm/boot_data.h>
+#include <asm/extmem.h>
#include <asm/sections.h>
+#include <asm/diag288.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
+#include <asm/sysinfo.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
+#include <asm/timex.h>
#include <asm/kasan.h>
#include <asm/kexec.h>
#include <asm/sclp.h>
@@ -18,7 +24,7 @@
#include "boot.h"
#include "uv.h"
-unsigned long __bootdata_preserved(__kaslr_offset);
+struct vm_layout __bootdata_preserved(vm_layout);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
pte_t *__bootdata_preserved(memcpy_real_ptep);
@@ -29,61 +35,131 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
-unsigned long __bootdata(ident_map_size);
+unsigned long __bootdata_preserved(page_noexec_mask);
+unsigned long __bootdata_preserved(segment_noexec_mask);
+unsigned long __bootdata_preserved(region_noexec_mask);
+union tod_clock __bootdata_preserved(tod_clock_base);
+u64 __bootdata_preserved(clock_comparator_max) = -1UL;
u64 __bootdata_preserved(stfle_fac_list[16]);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
-struct machine_info machine;
-
void error(char *x)
{
- sclp_early_printk("\n\n");
- sclp_early_printk(x);
- sclp_early_printk("\n\n -- System halted");
-
+ boot_emerg("%s\n", x);
+ boot_emerg(" -- System halted\n");
disabled_wait();
}
+static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static void detect_machine_type(void)
+{
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ set_machine_feature(MFEATURE_LPAR);
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
+ /* Detect known hypervisors */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ set_machine_feature(MFEATURE_KVM);
+ else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+ set_machine_feature(MFEATURE_VM);
+}
+
+static void detect_diag288(void)
+{
+ /* "BEGIN" in EBCDIC character set */
+ static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5";
+ unsigned long action, len;
+
+ action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART;
+ len = machine_is_vm() ? sizeof(cmd) : 0;
+ if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len))
+ return;
+ __diag288(WDT_FUNC_CANCEL, 0, 0, 0);
+ set_machine_feature(MFEATURE_DIAG288);
+}
+
+static void detect_diag9c(void)
+{
+ unsigned int cpu;
+ int rc = 1;
+
+ cpu = stap();
+ asm_inline volatile(
+ " diag %[cpu],%%r0,0x9c\n"
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc)
+ : [cpu] "d" (cpu)
+ : "cc", "memory");
+ if (!rc)
+ set_machine_feature(MFEATURE_DIAG9C);
+}
+
+static void reset_tod_clock(void)
+{
+ union tod_clock clk;
+
+ if (store_tod_clock_ext_cc(&clk) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+ disabled_wait();
+ memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+ tod_clock_base.tod = TOD_UNIX_EPOCH;
+ get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
+}
+
static void detect_facilities(void)
{
- if (test_facility(8)) {
- machine.has_edat1 = 1;
+ if (cpu_has_edat1())
local_ctl_set_bit(0, CR0_EDAT_BIT);
+ page_noexec_mask = -1UL;
+ segment_noexec_mask = -1UL;
+ region_noexec_mask = -1UL;
+ if (!cpu_has_nx()) {
+ page_noexec_mask &= ~_PAGE_NOEXEC;
+ segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC;
+ region_noexec_mask &= ~_REGION_ENTRY_NOEXEC;
+ }
+ if (IS_ENABLED(CONFIG_PCI) && test_facility(153))
+ set_machine_feature(MFEATURE_PCI_MIO);
+ reset_tod_clock();
+ if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+ /* Enable signed clock comparator comparisons */
+ set_machine_feature(MFEATURE_SCC);
+ clock_comparator_max = -1UL >> 1;
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
}
- if (test_facility(78))
- machine.has_edat2 = 1;
- if (test_facility(130))
- machine.has_nx = 1;
+ if (test_facility(50) && test_facility(73)) {
+ set_machine_feature(MFEATURE_TX);
+ local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
+ }
+ if (cpu_has_vx())
+ local_ctl_set_bit(0, CR0_VECTOR_BIT);
}
static int cmma_test_essa(void)
{
- unsigned long reg1, reg2, tmp = 0;
+ unsigned long tmp = 0;
int rc = 1;
- psw_t old;
/* Test ESSA_GET_STATE */
- asm volatile(
- " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
- " epsw %[reg1],%[reg2]\n"
- " st %[reg1],0(%[psw_pgm])\n"
- " st %[reg2],4(%[psw_pgm])\n"
- " larl %[reg1],1f\n"
- " stg %[reg1],8(%[psw_pgm])\n"
+ asm_inline volatile(
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- " la %[rc],0\n"
- "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
- : [reg1] "=&d" (reg1),
- [reg2] "=&a" (reg2),
- [rc] "+&d" (rc),
- [tmp] "=&d" (tmp),
- "+Q" (S390_lowcore.program_new_psw),
- "=Q" (old)
- : [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
- [cmd] "i" (ESSA_GET_STATE)
+ "0: lhi %[rc],0\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [rc] "+d" (rc), [tmp] "+d" (tmp)
+ : [cmd] "i" (ESSA_GET_STATE)
: "cc", "memory");
return rc;
}
@@ -102,16 +178,26 @@ static void cmma_init(void)
static void setup_lpp(void)
{
- S390_lowcore.current_pid = 0;
- S390_lowcore.lpp = LPP_MAGIC;
+ get_lowcore()->current_pid = 0;
+ get_lowcore()->lpp = LPP_MAGIC;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
-unsigned long mem_safe_offset(void)
+static unsigned long mem_safe_offset(void)
{
- return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ return (unsigned long)_compressed_start;
+}
+
+static void deploy_kernel(void *output)
+{
+ void *uncompressed_start = (void *)_compressed_start;
+
+ if (output == uncompressed_start)
+ return;
+ memmove(output, uncompressed_start, vmlinux.image_size);
+ memset(uncompressed_start, 0, vmlinux.image_size);
}
#endif
@@ -127,7 +213,7 @@ static void rescue_initrd(unsigned long min, unsigned long max)
return;
old_addr = addr;
physmem_free(RR_INITRD);
- addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+ addr = physmem_alloc_or_die(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
}
@@ -141,67 +227,15 @@ static void copy_bootdata(void)
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
}
-#ifdef CONFIG_PIE_BUILD
-static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
-{
- Elf64_Rela *rela_start, *rela_end, *rela;
- int r_type, r_sym, rc;
- Elf64_Addr loc, val;
- Elf64_Sym *dynsym;
-
- rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
- rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
- dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
- for (rela = rela_start; rela < rela_end; rela++) {
- loc = rela->r_offset + offset;
- val = rela->r_addend;
- r_sym = ELF64_R_SYM(rela->r_info);
- if (r_sym) {
- if (dynsym[r_sym].st_shndx != SHN_UNDEF)
- val += dynsym[r_sym].st_value + offset;
- } else {
- /*
- * 0 == undefined symbol table index (STN_UNDEF),
- * used for R_390_RELATIVE, only add KASLR offset
- */
- val += offset;
- }
- r_type = ELF64_R_TYPE(rela->r_info);
- rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
- if (rc)
- error("Unknown relocation type");
- }
-}
-
-static void kaslr_adjust_got(unsigned long offset) {}
-static void rescue_relocs(void) {}
-static void free_relocs(void) {}
-#else
-static int *vmlinux_relocs_64_start;
-static int *vmlinux_relocs_64_end;
-
-static void rescue_relocs(void)
-{
- unsigned long size = __vmlinux_relocs_64_end - __vmlinux_relocs_64_start;
-
- vmlinux_relocs_64_start = (void *)physmem_alloc_top_down(RR_RELOC, size, 0);
- vmlinux_relocs_64_end = (void *)vmlinux_relocs_64_start + size;
- memmove(vmlinux_relocs_64_start, __vmlinux_relocs_64_start, size);
-}
-
-static void free_relocs(void)
-{
- physmem_free(RR_RELOC);
-}
-
-static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
+static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
+ unsigned long offset, unsigned long phys_offset)
{
int *reloc;
long loc;
/* Adjust R_390_64 relocations */
- for (reloc = vmlinux_relocs_64_start; reloc < vmlinux_relocs_64_end; reloc++) {
- loc = (long)*reloc + offset;
+ for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) {
+ loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n");
*(u64 *)loc += offset;
@@ -213,24 +247,29 @@ static void kaslr_adjust_got(unsigned long offset)
u64 *entry;
/*
- * Even without -fPIE, Clang still uses a global offset table for some
- * reason. Adjust the GOT entries.
+ * Adjust GOT entries, except for ones for undefined weak symbols
+ * that resolved to zero. This also skips the first three reserved
+ * entries on s390x that are zero.
*/
- for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset;
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+ if (*entry)
+ *entry += offset;
+ }
}
-#endif
/*
* Merge information from several sources into a single ident_map_size value.
* "ident_map_size" represents the upper limit of physical memory we may ever
* reach. It might not be all online memory, but also include standby (offline)
- * memory. "ident_map_size" could be lower then actual standby or even online
+ * memory or memory areas reserved for other means (e.g., memory devices such as
+ * virtio-mem).
+ *
+ * "ident_map_size" could be lower then actual standby/reserved or even online
* memory present, due to limiting factors. We should never go above this limit.
* It is the size of our identity mapping.
*
* Consider the following factors:
- * 1. max_physmem_end - end of physical memory online or standby.
+ * 1. max_physmem_end - end of physical memory online, standby or reserved.
* Always >= end of the last online memory range (get_physmem_online_end()).
* 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
* kernel is able to support.
@@ -253,17 +292,40 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
if (oldmem_data.start) {
__kaslr_enabled = 0;
ident_map_size = min(ident_map_size, oldmem_data.size);
+ boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size);
} else if (ipl_block_valid && is_ipl_block_dump()) {
__kaslr_enabled = 0;
- if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
+ if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) {
ident_map_size = min(ident_map_size, hsa_size);
+ boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size);
+ }
}
#endif
+ boot_debug("Identity map size: 0x%016lx\n", ident_map_size);
}
-static unsigned long setup_kernel_memory_layout(void)
+#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
+
+static unsigned long get_vmem_size(unsigned long identity_size,
+ unsigned long vmemmap_size,
+ unsigned long vmalloc_size,
+ unsigned long rte_size)
+{
+ unsigned long max_mappable, vsize;
+
+ max_mappable = max(identity_size, MAX_DCSS_ADDR);
+ vsize = round_up(SZ_2G + max_mappable, rte_size) +
+ round_up(vmemmap_size, rte_size) +
+ FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ vsize += MODULES_LEN * 2;
+ return size_add(vsize, vmalloc_size);
+}
+
+static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
{
unsigned long vmemmap_start;
+ unsigned long kernel_start;
unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
@@ -275,12 +337,20 @@ static unsigned long setup_kernel_memory_layout(void)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size +
- MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE;
- vsize = size_add(vsize, vmalloc_size);
- if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
+ BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
+ vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ boot_debug("vmem size estimated: 0x%016lx\n", vsize);
+ if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
+ (vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
- rte_size = _REGION2_SIZE;
+ if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) {
+ rte_size = _REGION2_SIZE;
+ vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE);
+ } else {
+ rte_size = _REGION3_SIZE;
+ }
} else {
asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
@@ -290,38 +360,93 @@ static unsigned long setup_kernel_memory_layout(void)
* Forcing modules and vmalloc area under the ultravisor
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
+ *
+ * Assume the secure storage limit always exceeds _REGION2_SIZE,
+ * otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
+ boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax);
#ifdef CONFIG_KASAN
+ BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
+ boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
- __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE);
- __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
- sizeof(struct lowcore));
- MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
+ vsize = min(vsize, vmax);
+ if (kaslr_enabled()) {
+ unsigned long kernel_end, kaslr_len, slots, pos;
+
+ kaslr_len = max(KASLR_LEN, vmax - vsize);
+ slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE);
+ if (get_random(slots, &pos))
+ pos = 0;
+ kernel_end = vmax - pos * THREAD_SIZE;
+ kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start,
+ kernel_size + kernel_size);
+ } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
+ kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
+ boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start,
+ kernel_start + kernel_size);
+ } else {
+ kernel_start = __NO_KASLR_START_KERNEL;
+ boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start,
+ kernel_start + kernel_size);
+ }
+ __kaslr_offset = kernel_start;
+ boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset);
+
+ MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ VMALLOC_END -= MODULES_LEN * 2;
+ boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END);
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
- vsize = round_down(VMALLOC_END / 2, _SEGMENT_SIZE);
+ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
+ vsize = round_down(vsize, _SEGMENT_SIZE);
vmalloc_size = min(vmalloc_size, vsize);
+ if (IS_ENABLED(CONFIG_KMSAN)) {
+ /* take 2/3 of vmalloc area for KMSAN shadow and origins */
+ vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE);
+ VMALLOC_END -= vmalloc_size * 2;
+ }
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END);
+
+ __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area,
+ __memcpy_real_area + MEMCPY_REAL_SIZE);
+ __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+ sizeof(struct lowcore));
+ boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore,
+ __abs_lowcore + ABS_LOWCORE_MAP_SIZE);
/* split remaining virtual space between 1:1 mapping & vmemmap array */
- pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
pages = SECTION_ALIGN_UP(pages);
/* keep vmemmap_start aligned to a top level region table entry */
- vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
- vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
- /* maximum mappable address as seen by arch_get_mappable_range() */
- max_mappable = vmemmap_start;
+ vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size);
/* make sure identity map doesn't overlay with vmemmap */
ident_map_size = min(ident_map_size, vmemmap_start);
vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
- /* make sure vmemmap doesn't overlay with vmalloc area */
- VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+ /* make sure vmemmap doesn't overlay with absolute lowcore area */
+ if (vmemmap_start + vmemmap_size > __abs_lowcore) {
+ vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+ ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE;
+ }
vmemmap = (struct page *)vmemmap_start;
+ /* maximum address for which linear mapping could be created (DCSS, memory) */
+ BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
+ max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
+ max_mappable = min(max_mappable, vmemmap_start);
+#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+#endif
+ boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base,
+ __identity_base + ident_map_size);
return asce_limit;
}
@@ -329,9 +454,9 @@ static unsigned long setup_kernel_memory_layout(void)
/*
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
*/
-static void clear_bss_section(unsigned long vmlinux_lma)
+static void clear_bss_section(unsigned long kernel_start)
{
- memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size);
+ memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size);
}
/*
@@ -348,22 +473,17 @@ static void setup_vmalloc_size(void)
vmalloc_size = max(size, vmalloc_size);
}
-static void kaslr_adjust_vmlinux_info(unsigned long offset)
+static void kaslr_adjust_vmlinux_info(long offset)
{
- *(unsigned long *)(&vmlinux.entry) += offset;
vmlinux.bootdata_off += offset;
vmlinux.bootdata_preserved_off += offset;
-#ifdef CONFIG_PIE_BUILD
- vmlinux.rela_dyn_start += offset;
- vmlinux.rela_dyn_end += offset;
- vmlinux.dynsym_start += offset;
-#else
vmlinux.got_start += offset;
vmlinux.got_end += offset;
-#endif
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
+ vmlinux.alt_instructions += offset;
+ vmlinux.alt_instructions_end += offset;
#ifdef CONFIG_KASAN
vmlinux.kasan_early_shadow_page_off += offset;
vmlinux.kasan_early_shadow_pte_off += offset;
@@ -375,21 +495,32 @@ static void kaslr_adjust_vmlinux_info(unsigned long offset)
void startup_kernel(void)
{
+ unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+ unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
+ unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
+ unsigned long kaslr_large_page_offset;
unsigned long max_physmem_end;
- unsigned long vmlinux_lma = 0;
- unsigned long amode31_lma = 0;
unsigned long asce_limit;
unsigned long safe_addr;
- void *img;
psw_t psw;
setup_lpp();
- safe_addr = mem_safe_offset();
+ store_ipl_parmblock();
+ uv_query_info();
+ setup_boot_command_line();
+ parse_boot_command_line();
/*
- * Reserve decompressor memory together with decompression heap, buffer and
- * memory which might be occupied by uncompressed kernel at default 1Mb
- * position (if KASLR is off or failed).
+ * Non-randomized kernel physical start address must be _SEGMENT_SIZE
+ * aligned (see blow).
+ */
+ nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
+ safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
+
+ /*
+ * Reserve decompressor memory together with decompression heap,
+ * buffer and memory which might be occupied by uncompressed kernel
+ * (if KASLR is off or failed).
*/
physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
@@ -397,52 +528,82 @@ void startup_kernel(void)
oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size;
- store_ipl_parmblock();
read_ipl_report();
- uv_query_info();
sclp_early_read_info();
- setup_boot_command_line();
- parse_boot_command_line();
+ sclp_early_detect_machine_features();
detect_facilities();
+ detect_diag9c();
+ detect_machine_type();
+ /* detect_diag288() needs machine type */
+ detect_diag288();
cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
- asce_limit = setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout(kernel_size);
/* got final ident_map_size, physmem allocations could be performed now */
physmem_set_usable_limit(ident_map_size);
detect_physmem_online_ranges(max_physmem_end);
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
- rescue_relocs();
+ /*
+ * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower
+ * 20 bits (the offset within a large page) are zero. Copy the last
+ * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to
+ * __kaslr_offset_phys.
+ *
+ * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset
+ * are identical, which is required to allow for large mappings of the
+ * kernel image.
+ */
+ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
if (kaslr_enabled()) {
- vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
- THREAD_SIZE, vmlinux.default_lma,
- ident_map_size);
- if (vmlinux_lma) {
- __kaslr_offset = vmlinux_lma - vmlinux.default_lma;
- kaslr_adjust_vmlinux_info(__kaslr_offset);
- }
- }
- vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
- physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size);
-
- if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
- img = decompress_kernel();
- memmove((void *)vmlinux_lma, img, vmlinux.image_size);
- } else if (__kaslr_offset) {
- img = (void *)vmlinux.default_lma;
- memmove((void *)vmlinux_lma, img, vmlinux.image_size);
- memset(img, 0, vmlinux.image_size);
+ unsigned long size = vmlinux_size + kaslr_large_page_offset;
+
+ text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
}
+ if (!text_lma)
+ text_lma = nokaslr_text_lma;
+ text_lma |= kaslr_large_page_offset;
+
+ /*
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
+ * never accessed via the kernel image mapping as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, this region could be used for something else and does
+ * not need to be reserved. See how it is skipped in setup_vmem().
+ */
+ __kaslr_offset_phys = text_lma - TEXT_OFFSET;
+ kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
+ physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
+ deploy_kernel((void *)text_lma);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
- if (kaslr_enabled())
- amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
- amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size;
+
+ /*
+ * In case KASLR is enabled the randomized location of .amode31
+ * section might overlap with .vmlinux.relocs section. To avoid that
+ * the below randomize_within_range() could have been called with
+ * __vmlinux_relocs_64_end as the lower range address. However,
+ * .amode31 section is written to by the decompressed kernel - at
+ * that time the contents of .vmlinux.relocs is not needed anymore.
+ * Conversely, .vmlinux.relocs is read only by the decompressor, even
+ * before the kernel started. Therefore, in case the two sections
+ * overlap there is no risk of corrupting any data.
+ */
+ if (kaslr_enabled()) {
+ unsigned long amode31_min;
+
+ amode31_min = (unsigned long)_decompressor_end;
+ amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
+ }
+ if (!amode31_lma)
+ amode31_lma = text_lma - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@@ -458,23 +619,28 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
- clear_bss_section(vmlinux_lma);
- kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, __kaslr_offset);
+ clear_bss_section(text_lma);
+ kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
+ __kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
- free_relocs();
- setup_vmem(asce_limit);
+ setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
+ dump_physmem_reserved();
copy_bootdata();
+ __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
+ (struct alt_instr *)_vmlinux_info.alt_instructions_end,
+ ALT_CTX_EARLY);
/*
* Save KASLR offset for early dumps, before vmcore_info is set.
* Mark as uneven to distinguish from real vmcore_info pointer.
*/
- S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
+ get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
/*
* Jump to the decompressed kernel entry point and switch DAT mode on.
*/
- psw.addr = vmlinux.entry;
+ psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
+ boot_debug("Starting kernel at: 0x%016lx\n", psw.addr);
__load_psw(psw);
}
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index faccb33b462c..bd68161434a6 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -1,11 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define IN_BOOT_STRING_C 1
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#undef CONFIG_KASAN
#undef CONFIG_KASAN_GENERIC
+#undef CONFIG_KMSAN
#include "../lib/string.c"
+/*
+ * Duplicate some functions from the common lib/string.c
+ * instead of fully including it.
+ */
+
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
@@ -22,6 +29,27 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}
+ssize_t sized_strscpy(char *dst, const char *src, size_t count)
+{
+ size_t len;
+
+ if (count == 0)
+ return -E2BIG;
+ len = strnlen(src, count - 1);
+ memcpy(dst, src, len);
+ dst[len] = '\0';
+ return src[len] ? -E2BIG : len;
+}
+
+void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+ uint64_t *xs = s;
+
+ while (count--)
+ *xs++ = v;
+ return s;
+}
+
char *skip_spaces(const char *str)
{
while (isspace(*str))
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 1e66d2cbb096..4568e8f81dac 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -8,12 +8,8 @@
#include "uv.h"
/* will be used in arch/s390/kernel/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
-#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
@@ -26,8 +22,8 @@ void uv_query_info(void)
if (!test_facility(158))
return;
- /* rc==0x100 means that there is additional data we do not process */
- if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
+ /* Ignore that there might be more data we do not process */
+ if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != UVC_RC_MORE_DATA)
return;
if (IS_ENABLED(CONFIG_KVM)) {
@@ -50,17 +46,15 @@ void uv_query_info(void)
uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver;
uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf;
uv_info.supp_secret_types = uvcb.supp_secret_types;
- uv_info.max_secrets = uvcb.max_secrets;
+ uv_info.max_assoc_secrets = uvcb.max_assoc_secrets;
+ uv_info.max_retr_secrets = uvcb.max_retr_secrets;
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
-#endif
}
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
@@ -92,4 +86,3 @@ void sanitize_prot_virt_host(void)
{
prot_virt_host = is_prot_virt_host_capable();
}
-#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 0f3070856f8d..da4a4a8d48e0 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -2,21 +2,8 @@
#ifndef BOOT_UV_H
#define BOOT_UV_H
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
-#else
-static inline unsigned long adjust_to_uv_max(unsigned long limit)
-{
- return limit;
-}
-static inline void sanitize_prot_virt_host(void) {}
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
#endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 09b10bb6e4d0..1d073acd05a7 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#define boot_fmt(fmt) "vmem: " fmt
+#include <linux/cpufeature.h>
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
@@ -9,10 +11,12 @@
#include <asm/ctlreg.h>
#include <asm/physmem_info.h>
#include <asm/maccess.h>
+#include <asm/machine.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
struct ctlreg __bootdata_preserved(s390_invalid_asce);
#ifdef CONFIG_PROC_FS
@@ -26,14 +30,47 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
+ POPULATE_LOWCORE,
POPULATE_ABS_LOWCORE,
+ POPULATE_IDENTITY,
+ POPULATE_KERNEL,
#ifdef CONFIG_KASAN
+ /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
POPULATE_KASAN_SHALLOW
#endif
};
+#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t
+static inline const char *get_populate_mode_name(enum populate_mode t)
+{
+ switch (t) {
+ POPULATE_MODE_NAME(NONE);
+ POPULATE_MODE_NAME(DIRECT);
+ POPULATE_MODE_NAME(LOWCORE);
+ POPULATE_MODE_NAME(ABS_LOWCORE);
+ POPULATE_MODE_NAME(IDENTITY);
+ POPULATE_MODE_NAME(KERNEL);
+#ifdef CONFIG_KASAN
+ POPULATE_MODE_NAME(KASAN_MAP_SHADOW);
+ POPULATE_MODE_NAME(KASAN_ZERO_SHADOW);
+ POPULATE_MODE_NAME(KASAN_SHALLOW);
+#endif
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool is_kasan_populate_mode(enum populate_mode mode)
+{
+#ifdef CONFIG_KASAN
+ return mode >= POPULATE_KASAN_MAP_SHADOW;
+#else
+ return false;
+#endif
+}
+
static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode);
#ifdef CONFIG_KASAN
@@ -49,24 +86,24 @@ static pte_t pte_z;
static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- pgtable_populate(start, end, mode);
+ unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start));
+ unsigned long sha_end = PAGE_ALIGN(__sha(end));
+
+ boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode),
+ start, end, sha_start, sha_end);
+ pgtable_populate(sha_start, sha_end, mode);
}
-static void kasan_populate_shadow(void)
+static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
{
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long memgap_start = 0;
- unsigned long untracked_end;
unsigned long start, end;
int i;
pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO));
- if (!machine.has_nx)
- pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC));
crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
@@ -76,54 +113,26 @@ static void kasan_populate_shadow(void)
__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(kasan_early_shadow_pte, 1);
- /*
- * Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * |1:1 ident mapping| /|1/8 of ident map|
- * | | / | |
- * +-end of ident map+ / +----------------+
- * | ... gap ... | / | kasan |
- * | | / | zero page |
- * +- vmalloc area -+ / | mapping |
- * | vmalloc_size | / | (untracked) |
- * +- modules vaddr -+ / +----------------+
- * | 2Gb |/ | unmapped | allocated per module
- * +- shadow start -+ +----------------+
- * | 1/8 addr space | | zero pg mapping| (untracked)
- * +- shadow end ----+---------+- shadow end ---+
- *
- * Current memory layout (KASAN_VMALLOC):
- * +- 0 -------------+ +- shadow start -+
- * |1:1 ident mapping| /|1/8 of ident map|
- * | | / | |
- * +-end of ident map+ / +----------------+
- * | ... gap ... | / | kasan zero page| (untracked)
- * | | / | mapping |
- * +- vmalloc area -+ / +----------------+
- * | vmalloc_size | / |shallow populate|
- * +- modules vaddr -+ / +----------------+
- * | 2Gb |/ |shallow populate|
- * +- shadow start -+ +----------------+
- * | 1/8 addr space | | zero pg mapping| (untracked)
- * +- shadow end ----+---------+- shadow end ---+
- */
-
for_each_physmem_usable_range(i, &start, &end) {
- kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
- if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260)
- kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate((unsigned long)__identity_va(start),
+ (unsigned long)__identity_va(end),
+ POPULATE_KASAN_MAP_SHADOW);
+ if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) {
+ kasan_populate((unsigned long)__identity_va(memgap_start),
+ (unsigned long)__identity_va(start),
+ POPULATE_KASAN_ZERO_SHADOW);
+ }
memgap_start = end;
}
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- untracked_end = VMALLOC_START;
- /* shallowly populate kasan shadow for vmalloc and modules */
- kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
- } else {
- untracked_end = MODULES_VADDR;
- }
+ kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
/* populate kasan shadow for untracked memory */
- kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
- kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START,
+ POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
@@ -180,7 +189,9 @@ static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
}
#else
-static inline void kasan_populate_shadow(void) {}
+static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
+{
+}
static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
unsigned long end, enum populate_mode mode)
@@ -226,7 +237,7 @@ static void *boot_crst_alloc(unsigned long val)
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table;
- table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+ table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size);
crst_table_init(table, val);
__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
@@ -242,7 +253,7 @@ static pte_t *boot_pte_alloc(void)
* during POPULATE_KASAN_MAP_SHADOW when EDAT is off
*/
if (!pte_leftover) {
- pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
+ pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
__arch_set_page_dat(pte, 1);
} else {
@@ -254,36 +265,82 @@ static pte_t *boot_pte_alloc(void)
return pte;
}
-static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode)
+static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size,
+ enum populate_mode mode)
{
switch (mode) {
case POPULATE_NONE:
- return -1;
+ return INVALID_PHYS_ADDR;
case POPULATE_DIRECT:
return addr;
+ case POPULATE_LOWCORE:
+ return __lowcore_pa(addr);
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
+ case POPULATE_KERNEL:
+ return __kernel_pa(addr);
+ case POPULATE_IDENTITY:
+ return __identity_pa(addr);
+#ifdef CONFIG_KASAN
+ case POPULATE_KASAN_MAP_SHADOW:
+ /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */
+ addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE);
+ if (addr) {
+ memset((void *)addr, 0, size);
+ return addr;
+ }
+ return INVALID_PHYS_ADDR;
+#endif
+ default:
+ return INVALID_PHYS_ADDR;
+ }
+}
+
+static bool large_page_mapping_allowed(enum populate_mode mode)
+{
+ switch (mode) {
+ case POPULATE_DIRECT:
+ case POPULATE_IDENTITY:
+ case POPULATE_KERNEL:
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
- addr = physmem_alloc_top_down(RR_VMEM, size, size);
- memset((void *)addr, 0, size);
- return addr;
#endif
+ return true;
default:
- return -1;
+ return false;
}
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- return machine.has_edat2 &&
- IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+ unsigned long pa, size = end - addr;
+
+ if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PUD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- return machine.has_edat1 &&
- IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+ unsigned long pa, size = end - addr;
+
+ if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) ||
+ !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ pa = resolve_pa_may_alloc(addr, size, mode);
+ if (!IS_ALIGNED(pa, PMD_SIZE))
+ return INVALID_PHYS_ADDR;
+
+ return pa;
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -297,22 +354,20 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (pte_none(*pte)) {
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
- entry = __pte(_pa(addr, PAGE_SIZE, mode));
+ entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode));
entry = set_pte_bit(entry, PAGE_KERNEL);
- if (!machine.has_nx)
- entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_4K, pages);
}
static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pmd_t *pmd, entry;
pte_t *pte;
@@ -322,11 +377,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next)) {
- entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
+ pa = try_get_large_pmd_pa(pmd, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pmd(pa);
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
- if (!machine.has_nx)
- entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@@ -338,14 +392,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
}
pgtable_pte_populate(pmd, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_1M, pages);
}
static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
- unsigned long next, pages = 0;
+ unsigned long pa, next, pages = 0;
pud_t *pud, entry;
pmd_t *pmd;
@@ -355,11 +409,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next)) {
- entry = __pud(_pa(addr, _REGION3_SIZE, mode));
+ pa = try_get_large_pud_pa(pud, addr, next, mode);
+ if (pa != INVALID_PHYS_ADDR) {
+ entry = __pud(pa);
entry = set_pud_bit(entry, REGION3_KERNEL);
- if (!machine.has_nx)
- entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;
@@ -371,7 +424,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
}
pgtable_pmd_populate(pud, addr, next, mode);
}
- if (mode == POPULATE_DIRECT)
+ if (mode == POPULATE_IDENTITY)
update_page_count(PG_DIRECT_MAP_2G, pages);
}
@@ -401,6 +454,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
pgd_t *pgd;
p4d_t *p4d;
+ if (!is_kasan_populate_mode(mode)) {
+ boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n",
+ get_populate_mode_name(mode), addr, end,
+ resolve_pa_may_alloc(addr, 0, mode),
+ resolve_pa_may_alloc(end - 1, 0, mode) + 1);
+ }
+
pgd = pgd_offset(&init_mm, addr);
for (; addr < end; addr = next, pgd++) {
next = pgd_addr_end(addr, end);
@@ -418,11 +478,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
}
}
-void setup_vmem(unsigned long asce_limit)
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
+ unsigned long lowcore_address = 0;
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
+ pgd_t *init_mm_pgd;
int i;
/*
@@ -433,6 +495,15 @@ void setup_vmem(unsigned long asce_limit)
for_each_physmem_online_range(i, &start, &end)
__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+ /*
+ * init_mm->pgd contains virtual address of swapper_pg_dir.
+ * It is unusable at this stage since DAT is yet off. Swap
+ * it for physical address of swapper_pg_dir and restore
+ * the virtual address after all page tables are created.
+ */
+ init_mm_pgd = init_mm.pgd;
+ init_mm.pgd = (pgd_t *)swapper_pg_dir;
+
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -447,28 +518,49 @@ void setup_vmem(unsigned long asce_limit)
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ if (machine_has_relocated_lowcore())
+ lowcore_address = LOWCORE_ALT_ADDRESS;
+
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
- pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
- for_each_physmem_usable_range(i, &start, &end)
- pgtable_populate(start, end, POPULATE_DIRECT);
+ pgtable_populate(lowcore_address,
+ lowcore_address + sizeof(struct lowcore),
+ POPULATE_LOWCORE);
+ for_each_physmem_usable_range(i, &start, &end) {
+ pgtable_populate((unsigned long)__identity_va(start),
+ (unsigned long)__identity_va(end),
+ POPULATE_IDENTITY);
+ }
+
+ /*
+ * [kernel_start..kernel_start + TEXT_OFFSET] region is never
+ * accessed as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
+ */
+ pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
+ pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
POPULATE_NONE);
- memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+ memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
- kasan_populate_shadow();
+ kasan_populate_shadow(kernel_start, kernel_end);
- S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
- S390_lowcore.user_asce = s390_invalid_asce;
+ get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
+ get_lowcore()->user_asce = s390_invalid_asce;
- local_ctl_load(1, &S390_lowcore.kernel_asce);
- local_ctl_load(7, &S390_lowcore.user_asce);
- local_ctl_load(13, &S390_lowcore.kernel_asce);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
+ local_ctl_load(13, &get_lowcore()->kernel_asce);
- init_mm.context.asce = S390_lowcore.kernel_asce.val;
+ init_mm.context.asce = get_lowcore()->kernel_asce.val;
+ init_mm.pgd = init_mm_pgd;
}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 3d7ea585ab99..50988022f9ea 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -40,6 +40,7 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
+ EXCEPTION_TABLE(16)
.got : {
*(.got)
}
@@ -99,8 +100,22 @@ SECTIONS
_decompressor_end = .;
+ . = ALIGN(4);
+ .vmlinux.relocs : {
+ __vmlinux_relocs_64_start = .;
+ *(.vmlinux.relocs_64)
+ __vmlinux_relocs_64_end = .;
+ }
+
#ifdef CONFIG_KERNEL_UNCOMPRESSED
- . = 0x100000;
+ . = ALIGN(PAGE_SIZE);
+ . += AMODE31_SIZE; /* .amode31 section */
+
+ /*
+ * Make sure the location counter is not less than TEXT_OFFSET.
+ * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
+ */
+ . = MAX(TEXT_OFFSET, ALIGN(1 << 20));
#else
. = ALIGN(8);
#endif
@@ -110,24 +125,6 @@ SECTIONS
_compressed_end = .;
}
-#ifndef CONFIG_PIE_BUILD
- /*
- * When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
- * uncompressed vmlinux.bin is positioned in the bzImage decompressor
- * image at the default kernel LMA of 0x100000, enabling it to be
- * executed in-place. However, the size of .vmlinux.relocs could be
- * large enough to cause an overlap with the uncompressed kernel at the
- * address 0x100000. To address this issue, .vmlinux.relocs is
- * positioned after the .rodata.compressed.
- */
- . = ALIGN(4);
- .vmlinux.relocs : {
- __vmlinux_relocs_64_start = .;
- *(.vmlinux.relocs_64)
- __vmlinux_relocs_64_end = .;
- }
-#endif
-
#define SB_TRAILER_SIZE 32
/* Trailer needed for Secure Boot */
. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
@@ -169,7 +166,6 @@ SECTIONS
/DISCARD/ : {
COMMON_DISCARDS
*(.eh_frame)
- *(__ex_table)
*(*__ksymtab*)
*(___kcrctab*)
}