diff options
Diffstat (limited to 'arch/s390/boot')
-rw-r--r-- | arch/s390/boot/.gitignore | 1 | ||||
-rw-r--r-- | arch/s390/boot/Makefile | 45 | ||||
-rw-r--r-- | arch/s390/boot/als.c | 49 | ||||
-rw-r--r-- | arch/s390/boot/alternative.c | 138 | ||||
-rw-r--r-- | arch/s390/boot/boot.h | 53 | ||||
-rw-r--r-- | arch/s390/boot/decompressor.c | 25 | ||||
-rw-r--r-- | arch/s390/boot/decompressor.h | 8 | ||||
-rw-r--r-- | arch/s390/boot/head.S | 29 | ||||
-rwxr-xr-x | arch/s390/boot/install.sh | 2 | ||||
-rw-r--r-- | arch/s390/boot/ipl_parm.c | 61 | ||||
-rw-r--r-- | arch/s390/boot/ipl_report.c | 5 | ||||
-rw-r--r-- | arch/s390/boot/kaslr.c | 6 | ||||
-rw-r--r-- | arch/s390/boot/kmsan.c | 6 | ||||
-rw-r--r-- | arch/s390/boot/pgm_check.c | 92 | ||||
-rw-r--r-- | arch/s390/boot/pgm_check_info.c | 179 | ||||
-rw-r--r-- | arch/s390/boot/physmem_info.c | 212 | ||||
-rw-r--r-- | arch/s390/boot/printk.c | 299 | ||||
-rw-r--r-- | arch/s390/boot/startup.c | 512 | ||||
-rw-r--r-- | arch/s390/boot/string.c | 28 | ||||
-rw-r--r-- | arch/s390/boot/uv.c | 15 | ||||
-rw-r--r-- | arch/s390/boot/uv.h | 13 | ||||
-rw-r--r-- | arch/s390/boot/vmem.c | 280 | ||||
-rw-r--r-- | arch/s390/boot/vmlinux.lds.S | 36 |
23 files changed, 1377 insertions, 717 deletions
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore index f5ef099e2fd3..af2a6a7bc028 100644 --- a/arch/s390/boot/.gitignore +++ b/arch/s390/boot/.gitignore @@ -5,4 +5,5 @@ relocs.S section_cmp.* vmlinux vmlinux.lds +vmlinux.map vmlinux.syms diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 294f08a8811a..bee49626be4b 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,55 +3,42 @@ # Makefile for the linux s390-specific parts of the memory manager. # +# Tooling runtimes are unavailable and cannot be linked for early boot code KCOV_INSTRUMENT := n GCOV_PROFILE := n UBSAN_SANITIZE := n KASAN_SANITIZE := n KCSAN_SANITIZE := n - -KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) -KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) +KMSAN_SANITIZE := n # -# Use minimum architecture for als.c to be able to print an error +# Use minimum architecture level so it is possible to print an error # message if the kernel is started on a machine which is too old # -ifndef CONFIG_CC_IS_CLANG -CC_FLAGS_MARCH_MINIMUM := -march=z900 -else CC_FLAGS_MARCH_MINIMUM := -march=z10 -endif - -ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM)) -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM) -AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) -AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM) -endif + +KBUILD_AFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_AFLAGS_DECOMPRESSOR)) +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_MARCH),$(KBUILD_CFLAGS_DECOMPRESSOR)) +KBUILD_AFLAGS += $(CC_FLAGS_MARCH_MINIMUM) +KBUILD_CFLAGS += $(CC_FLAGS_MARCH_MINIMUM) CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o -obj-y += $(if $(CONFIG_PIE_BUILD),machine_kexec_reloc.o,relocs.o) -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-y += version.o pgm_check.o ctype.o ipl_data.o relocs.o alternative.o +obj-y += uv.o printk.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o +obj-$(CONFIG_KMSAN) += kmsan.o obj-all := $(obj-y) piggy.o syms.o targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all) -ifndef CONFIG_PIE_BUILD targets += relocs.S -endif OBJECTS := $(addprefix $(obj)/,$(obj-y)) OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all)) @@ -110,13 +97,9 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -ifndef CONFIG_PIE_BUILD -CMD_RELOCS=arch/s390/tools/relocs -quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(CMD_RELOCS) $< > $@ -$(obj)/relocs.S: vmlinux FORCE - $(call if_changed,relocs) -endif +# relocs.S is created by the vmlinux postlink step. +$(obj)/relocs.S: vmlinux + @true suffix-$(CONFIG_KERNEL_GZIP) := .gz suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 47c48fbfb563..79afb5fa7f1f 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -9,42 +9,8 @@ #include <asm/sclp.h> #include "boot.h" -/* - * The code within this file will be called very early. It may _not_ - * access anything within the bss section, since that is not cleared - * yet and may contain data (e.g. initrd) that must be saved by other - * code. - * For temporary objects the stack (16k) should be used. - */ - static unsigned long als[] = { FACILITIES_ALS }; -static void u16_to_hex(char *str, u16 val) -{ - int i, num; - - for (i = 1; i <= 4; i++) { - num = (val >> (16 - 4 * i)) & 0xf; - if (num >= 10) - num += 7; - *str++ = '0' + num; - } - *str = '\0'; -} - -static void print_machine_type(void) -{ - static char mach_str[80] = "Detected machine-type number: "; - char type_str[5]; - struct cpuid id; - - get_cpu_id(&id); - u16_to_hex(type_str, id.machine); - strcat(mach_str, type_str); - strcat(mach_str, "\n"); - sclp_early_printk(mach_str); -} - static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -80,8 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - strcat(als_str, "\n"); - sclp_early_printk(als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -89,16 +54,18 @@ void print_missing_facilities(void) first = 0; } } - strcat(als_str, "\n"); - sclp_early_printk(als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) { - sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); - print_machine_type(); + struct cpuid id; + + get_cpu_id(&id); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - sclp_early_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 000000000000..19ea7934b918 --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "alt: " fmt +#include "boot.h" + +#define a_debug boot_debug + +#include "../kernel/alternative.c" + +static void alt_debug_all(int type) +{ + int i; + + switch (type) { + case ALT_TYPE_FACILITY: + for (i = 0; i < ARRAY_SIZE(alt_debug.facilities); i++) + alt_debug.facilities[i] = -1UL; + break; + case ALT_TYPE_FEATURE: + for (i = 0; i < ARRAY_SIZE(alt_debug.mfeatures); i++) + alt_debug.mfeatures[i] = -1UL; + break; + case ALT_TYPE_SPEC: + alt_debug.spec = 1; + break; + } +} + +static void alt_debug_modify(int type, unsigned int nr, bool clear) +{ + switch (type) { + case ALT_TYPE_FACILITY: + if (clear) + __clear_facility(nr, alt_debug.facilities); + else + __set_facility(nr, alt_debug.facilities); + break; + case ALT_TYPE_FEATURE: + if (clear) + __clear_machine_feature(nr, alt_debug.mfeatures); + else + __set_machine_feature(nr, alt_debug.mfeatures); + break; + } +} + +static char *alt_debug_parse(int type, char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + int i; + + if (*str == ':') { + str++; + } else { + alt_debug_all(type); + return str; + } + clear = false; + if (*str == '!') { + alt_debug_all(type); + clear = true; + str++; + } + while (*str) { + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + alt_debug_modify(type, val, clear); + val++; + } + } else { + alt_debug_modify(type, val, clear); + } + if (*str != ',') + break; + str++; + } + return str; +} + +/* + * Use debug-alternative command line parameter for debugging: + * "debug-alternative" + * -> print debug message for every single alternative + * + * "debug-alternative=0;2" + * -> print debug message for all alternatives with type 0 and 2 + * + * "debug-alternative=0:0-7" + * -> print debug message for all alternatives with type 0 and with + * facility numbers within the range of 0-7 + * (if type 0 is ALT_TYPE_FACILITY) + * + * "debug-alternative=0:!8;1" + * -> print debug message for all alternatives with type 0, for all + * facility number, except facility 8, and in addition print all + * alternatives with type 1 + */ +void alt_debug_setup(char *str) +{ + unsigned long type; + char *endp; + int i; + + if (!str) { + alt_debug_all(ALT_TYPE_FACILITY); + alt_debug_all(ALT_TYPE_FEATURE); + alt_debug_all(ALT_TYPE_SPEC); + return; + } + while (*str) { + type = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + switch (type) { + case ALT_TYPE_FACILITY: + case ALT_TYPE_FEATURE: + str = alt_debug_parse(type, str); + break; + case ALT_TYPE_SPEC: + alt_debug_all(ALT_TYPE_SPEC); + break; + } + if (*str != ';') + break; + str++; + } +} diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 567d60f78bbc..e045cae6e80a 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,16 +8,10 @@ #ifndef __ASSEMBLY__ +#include <linux/printk.h> #include <asm/physmem_info.h> -struct machine_info { - unsigned char has_edat1 : 1; - unsigned char has_edat2 : 1; - unsigned char has_nx : 1; -}; - struct vmlinux_info { - unsigned long default_lma; unsigned long entry; unsigned long image_size; /* does not include .bss */ unsigned long bss_size; /* uncompressed image .bss size */ @@ -25,18 +19,14 @@ struct vmlinux_info { unsigned long bootdata_size; unsigned long bootdata_preserved_off; unsigned long bootdata_preserved_size; -#ifdef CONFIG_PIE_BUILD - unsigned long dynsym_start; - unsigned long rela_dyn_start; - unsigned long rela_dyn_end; -#else unsigned long got_start; unsigned long got_end; -#endif unsigned long amode31_size; unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; @@ -53,13 +43,16 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); @@ -71,15 +64,33 @@ void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); -void print_pgm_check_info(void); +void alt_debug_setup(char *str); +void do_pgm_check(struct pt_regs *regs); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); -void setup_vmem(unsigned long asce_limit); -void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); +int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); + +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; @@ -95,9 +106,15 @@ extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; + #define vmlinux _vmlinux_info +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) +#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) +#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) +#define __identity_va(x) ((void *)((unsigned long)(x) + __identity_base)) +#define __identity_pa(x) ((unsigned long)(x) - __identity_base) static inline bool intersects(unsigned long addr0, unsigned long size0, unsigned long addr1, unsigned long size1) diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index d762733a0753..03500b9d9fb9 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/boot_data.h> #include <asm/page.h> #include "decompressor.h" #include "boot.h" @@ -63,24 +64,22 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif -#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE) +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} unsigned long mem_safe_offset(void) { - /* - * due to 4MB HEAD_SIZE for bzip2 - * 'decompress_offset + vmlinux.image_size' could be larger than - * kernel at final position + its .bss, so take the larger of two - */ - return max(decompress_offset + vmlinux.image_size, - vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size); + return ALIGN(free_mem_end_ptr, PAGE_SIZE); } -void *decompress_kernel(void) +void deploy_kernel(void *output) { - void *output = (void *)decompress_offset; - __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); - return output; + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h index 92b81d2ea35d..4f966f06bd65 100644 --- a/arch/s390/boot/decompressor.h +++ b/arch/s390/boot/decompressor.h @@ -2,11 +2,9 @@ #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H -#ifdef CONFIG_KERNEL_UNCOMPRESSED -static inline void *decompress_kernel(void) { return NULL; } -#else -void *decompress_kernel(void); -#endif +#ifndef CONFIG_KERNEL_UNCOMPRESSED unsigned long mem_safe_offset(void); +void deploy_kernel(void *output); +#endif #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 637c29c3f6e3..0b511d5c030b 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -254,8 +254,9 @@ SYM_CODE_START_LOCAL(startup_normal) xc 0xf00(256),0xf00 larl %r13,.Lctl lctlg %c0,%c15,0(%r13) # load control registers - stcke __LC_BOOT_CLOCK - mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1 + larl %r13,tod_clock_base + stcke 0(%r13) + mvc __LC_LAST_UPDATE_CLOCK(8),1(%r13) larl %r13,6f spt 0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),0(%r13) @@ -292,18 +293,12 @@ SYM_CODE_END(startup_normal) #include "head_kdump.S" -# -# This program check is active immediately after kernel start -# and until early_pgm_check_handler is set in kernel/early.c -# It simply saves general/control registers and psw in -# the save area and does disabled wait with a faulty address. -# SYM_CODE_START_LOCAL(startup_pgm_check_handler) - stmg %r8,%r15,__LC_SAVE_AREA_SYNC + stmg %r8,%r15,__LC_SAVE_AREA la %r8,4095 stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) - mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits @@ -311,8 +306,18 @@ SYM_CODE_START_LOCAL(startup_pgm_check_handler) oi __LC_RETURN_PSW+1,0x2 # set wait state bit larl %r9,.Lold_psw_disabled_wait stg %r9,__LC_PGM_NEW_PSW+8 - larl %r15,_dump_info_stack_end-STACK_FRAME_OVERHEAD - brasl %r14,print_pgm_check_info + larl %r15,_dump_info_stack_end-(STACK_FRAME_OVERHEAD+__PT_SIZE) + la %r2,STACK_FRAME_OVERHEAD(%r15) + mvc __PT_PSW(16,%r2),__LC_PSW_SAVE_AREA-4095(%r8) + mvc __PT_R0(128,%r2),__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __PT_LAST_BREAK(8,%r2),__LC_PGM_LAST_BREAK + mvc __PT_INT_CODE(4,%r2),__LC_PGM_INT_CODE + brasl %r14,do_pgm_check + larl %r9,startup_pgm_check_handler + stg %r9,__LC_PGM_NEW_PSW+8 + mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) + lpswe __LC_RETURN_PSW .Lold_psw_disabled_wait: la %r8,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index a13dd2f2aa1c..fa41486258ee 100755 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -15,6 +15,8 @@ # $3 - kernel map file # $4 - default install path (blank if root directory) +set -e + echo "Warning: '${INSTALLKERNEL}' command not available - additional " \ "bootloader config required" >&2 if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index b24de9aabf7d..f584d7da29cb 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,7 +3,9 @@ #include <linux/init.h> #include <linux/ctype.h> #include <linux/pgtable.h> +#include <asm/abs_lowcore.h> #include <asm/page-states.h> +#include <asm/machine.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> @@ -33,29 +35,14 @@ int vmalloc_size_set; static inline int __diag308(unsigned long subcode, void *addr) { - unsigned long reg1, reg2; - union register_pair r1; - psw_t old; - - r1.even = (unsigned long) addr; - r1.odd = 0; - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + union register_pair r1 = { .even = (unsigned long)addr, .odd = 0 }; + + asm_inline volatile( " diag %[r1],%[subcode],0x308\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [r1] "+&d" (r1.pair), - [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - "+Q" (S390_lowcore.program_new_psw), - "=Q" (old) - : [subcode] "d" (subcode), - [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) + "0:\n" + EX_TABLE(0b, 0b) + : [r1] "+d" (r1.pair) + : [subcode] "d" (subcode) : "cc", "memory"); return r1.odd; } @@ -192,7 +179,7 @@ void setup_boot_command_line(void) if (has_ebcdic_char(parmarea.command_line)) EBCASC(parmarea.command_line, COMMAND_LINE_SIZE); /* copy arch command line */ - strcpy(early_command_line, strim(parmarea.command_line)); + strscpy(early_command_line, strim(parmarea.command_line)); /* append IPL PARM data to the boot command line */ if (!is_prot_virt_guest() && ipl_block_valid) @@ -214,7 +201,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } @@ -266,7 +253,8 @@ void parse_boot_command_line(void) int rc; __kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE); - args = strcpy(command_line_buf, early_command_line); + strscpy(command_line_buf, early_command_line); + args = command_line_buf; while (*args) { args = next_arg(args, ¶m, &val); @@ -294,6 +282,9 @@ void parse_boot_command_line(void) if (!strcmp(param, "facilities") && val) modify_fac_list(val); + if (!strcmp(param, "debug-alternative")) + alt_debug_setup(val); + if (!strcmp(param, "nokaslr")) __kaslr_enabled = 0; @@ -310,5 +301,25 @@ void parse_boot_command_line(void) prot_virt_host = 1; } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + set_machine_feature(MFEATURE_LOWCORE); + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) { + bootdebug = true; + if (val) + strscpy(bootdebug_filter, val); + } + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 1803035e68d2..f73cd757a5f7 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data @@ -106,7 +105,7 @@ int read_ipl_report(void) * the IPL parameter list, then align the address to a double * word boundary. */ - tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr; + tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr; pl_hdr = (struct ipl_pl_hdr *) tmp; tmp = (tmp + pl_hdr->len + 7) & -8UL; rl_hdr = (struct ipl_rl_hdr *) tmp; @@ -155,7 +154,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 90602101e2ae..941f4c9e27cc 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - sclp_early_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -43,7 +43,7 @@ static int check_prng(void) return PRNG_MODE_TDES; } -static int get_random(unsigned long limit, unsigned long *value) +int get_random(unsigned long limit, unsigned long *value) { struct prng_parm prng = { /* initial parameter block for tdes mode, copied from libica */ @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c new file mode 100644 index 000000000000..e7b3ac48143e --- /dev/null +++ b/arch/s390/boot/kmsan.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kmsan-checks.h> + +void kmsan_unpoison_memory(const void *address, size_t size) +{ +} diff --git a/arch/s390/boot/pgm_check.c b/arch/s390/boot/pgm_check.c new file mode 100644 index 000000000000..fa621fa5bc02 --- /dev/null +++ b/arch/s390/boot/pgm_check.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +void print_stacktrace(unsigned long sp) +{ + struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, + (unsigned long)_stack_end }; + bool first = true; + + boot_emerg("Call Trace:\n"); + while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { + struct stack_frame *sf = (struct stack_frame *)sp; + + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + if (sf->back_chain <= sp) + break; + sp = sf->back_chain; + first = false; + } +} + +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long extable_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +static bool ex_handler(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; + + for (ex = __start___ex_table; ex < __stop___ex_table; ex++) { + if (extable_insn(ex) != regs->psw.addr) + continue; + if (ex->type != EX_TYPE_FIXUP) + return false; + regs->psw.addr = extable_fixup(ex); + return true; + } + return false; +} + +void do_pgm_check(struct pt_regs *regs) +{ + struct psw_bits *psw = &psw_bits(regs->psw); + unsigned long *gpregs = regs->gprs; + + if (ex_handler(regs)) + return; + if (bootdebug) + boot_rb_dump(); + boot_emerg("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", + regs->int_code & 0xffff, regs->int_code >> 17); + if (kaslr_enabled()) { + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); + } + boot_emerg("PSW : %016lx %016lx (%pS)\n", + regs->psw.mask, regs->psw.addr, (void *)regs->psw.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + print_stacktrace(gpregs[15]); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", regs->last_break, (void *)regs->last_break); + /* Convert to disabled wait PSW */ + psw->io = 0; + psw->ext = 0; + psw->wait = 1; +} diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c deleted file mode 100644 index 97244cd7a206..000000000000 --- a/arch/s390/boot/pgm_check_info.c +++ /dev/null @@ -1,179 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/stdarg.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <asm/stacktrace.h> -#include <asm/boot_data.h> -#include <asm/lowcore.h> -#include <asm/setup.h> -#include <asm/sclp.h> -#include <asm/uv.h> -#include "boot.h" - -const char hex_asc[] = "0123456789abcdef"; - -static char *as_hex(char *dst, unsigned long val, int pad) -{ - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - - for (*p-- = 0; p >= dst; val >>= 4) - *p-- = hex_asc[val & 0x0f]; - return end; -} - -static char *symstart(char *p) -{ - while (*p) - p--; - return p + 1; -} - -static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) -{ - /* symbol entries are in a form "10000 c4 startup\0" */ - char *a = _decompressor_syms_start; - char *b = _decompressor_syms_end; - unsigned long start; - unsigned long size; - char *pivot; - char *endp; - - while (a < b) { - pivot = symstart(a + (b - a) / 2); - start = simple_strtoull(pivot, &endp, 16); - size = simple_strtoull(endp + 1, &endp, 16); - if (ip < start) { - b = pivot; - continue; - } - if (ip > start + size) { - a = pivot + strlen(pivot) + 1; - continue; - } - *off = ip - start; - *len = size; - return endp + 1; - } - return NULL; -} - -static noinline char *strsym(void *ip) -{ - static char buf[64]; - unsigned short off; - unsigned short len; - char *p; - - p = findsym((unsigned long)ip, &off, &len); - if (p) { - strncpy(buf, p, sizeof(buf)); - /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); - strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); - } else { - as_hex(buf, (unsigned long)ip, 16); - } - return buf; -} - -void decompressor_printk(const char *fmt, ...) -{ - char buf[1024] = { 0 }; - char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; - va_list args; - - va_start(args, fmt); - for (; p < end && *fmt; fmt++) { - if (*fmt != '%') { - *p++ = *fmt; - continue; - } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; - switch (*fmt) { - case 's': - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); - break; - case 'p': - if (*++fmt != 'S') - goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); - break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); - break; - case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); - break; - default: - goto out; - } - } -out: - va_end(args); - sclp_early_printk(buf); -} - -void print_stacktrace(unsigned long sp) -{ - struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, - (unsigned long)_stack_end }; - bool first = true; - - decompressor_printk("Call Trace:\n"); - while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { - struct stack_frame *sf = (struct stack_frame *)sp; - - decompressor_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); - if (sf->back_chain <= sp) - break; - sp = sf->back_chain; - first = false; - } -} - -void print_pgm_check_info(void) -{ - unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area; - struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area); - - decompressor_printk("Linux version %s\n", kernel_version); - if (!is_prot_virt_guest() && early_command_line[0]) - decompressor_printk("Kernel command line: %s\n", early_command_line); - decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n", - S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1); - if (kaslr_enabled()) - decompressor_printk("Kernel random base: %lx\n", __kaslr_offset); - decompressor_printk("PSW : %016lx %016lx (%pS)\n", - S390_lowcore.psw_save_area.mask, - S390_lowcore.psw_save_area.addr, - (void *)S390_lowcore.psw_save_area.addr); - decompressor_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - decompressor_printk("GPRS: %016lx %016lx %016lx %016lx\n", - gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - decompressor_printk(" %016lx %016lx %016lx %016lx\n", - gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - decompressor_printk(" %016lx %016lx %016lx %016lx\n", - gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - decompressor_printk(" %016lx %016lx %016lx %016lx\n", - gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(S390_lowcore.gpregs_save_area[15]); - decompressor_printk("Last Breaking-Event-Address:\n"); - decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, - (void *)S390_lowcore.pgm_last_break); -} diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 0cf79826eef9..45e3d057cfaa 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include <linux/processor.h> #include <linux/errno.h> #include <linux/init.h> @@ -9,6 +10,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/sclp.h> +#include <asm/asm.h> #include <asm/uv.h> #include "decompressor.h" #include "boot.h" @@ -27,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -57,37 +59,25 @@ void add_physmem_online_range(u64 start, u64 end) static int __diag260(unsigned long rx1, unsigned long rx2) { - unsigned long reg1, reg2, ry; union register_pair rx; - psw_t old; - int rc; + int cc, exception; + unsigned long ry; rx.even = rx1; rx.odd = rx2; ry = 0x10; /* storage configuration */ - rc = -1; /* fail */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + exception = 1; + asm_inline volatile( " diag %[rx],%[ry],0x260\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [ry] "+&d" (ry), - "+Q" (S390_lowcore.program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) - : "cc", "memory"); - return rc == 0 ? ry : -1; + "0: lhi %[exc],0\n" + "1:\n" + CC_IPM(cc) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception), [ry] "+d" (ry) + : [rx] "d" (rx.pair) + : CC_CLOBBER_LIST("memory")); + cc = exception ? -1 : CC_TRANSFORM(cc); + return cc == 0 ? ry : -1; } static int diag260(void) @@ -109,33 +99,44 @@ static int diag260(void) return 0; } +#define DIAG500_SC_STOR_LIMIT 4 + +static int diag500_storage_limit(unsigned long *max_physmem_end) +{ + unsigned long storage_limit; + + asm_inline volatile( + " lghi %%r1,%[subcode]\n" + " lghi %%r2,0\n" + " diag %%r2,%%r4,0x500\n" + "0: lgr %[slimit],%%r2\n" + EX_TABLE(0b, 0b) + : [slimit] "=d" (storage_limit) + : [subcode] "i" (DIAG500_SC_STOR_LIMIT) + : "memory", "1", "2"); + if (!storage_limit) + return -EINVAL; + /* Convert inclusive end to exclusive end */ + *max_physmem_end = storage_limit + 1; + return 0; +} + static int tprot(unsigned long addr) { - unsigned long reg1, reg2; - int rc = -EFAULT; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + int cc, exception; + + exception = 1; + asm_inline volatile( " tprot 0(%[addr]),0\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - "=Q" (S390_lowcore.program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), - [addr] "a" (addr) - : "cc", "memory"); - return rc; + "0: lhi %[exc],0\n" + "1:\n" + CC_IPM(cc) + EX_TABLE(0b, 1b) + : CC_OUT(cc, cc), [exc] "+d" (exception) + : [addr] "a" (addr) + : CC_CLOBBER_LIST("memory")); + cc = exception ? -EFAULT : CC_TRANSFORM(cc); + return cc; } static unsigned long search_mem_end(void) @@ -157,30 +158,48 @@ unsigned long detect_max_physmem_end(void) { unsigned long max_physmem_end = 0; - if (!sclp_early_get_memsize(&max_physmem_end)) { + if (!diag500_storage_limit(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_DIAG500_STOR_LIMIT; + } else if (!sclp_early_get_memsize(&max_physmem_end)) { physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; } else { max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; + } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { + unsigned long online_end; + + if (!sclp_early_get_memsize(&online_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + add_physmem_online_range(0, online_end); + } } else if (!diag260()) { physmem_info.info_source = MEM_DETECT_DIAG260; } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -190,38 +209,47 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - decompressor_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - decompressor_printk("Kernel command line: %s\n", early_command_line); - decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - decompressor_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - decompressor_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - sclp_early_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -288,41 +316,73 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c new file mode 100644 index 000000000000..4bb6bc95704e --- /dev/null +++ b/arch/s390/boot/printk.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/stdarg.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/sections.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/timex.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "boot.h" + +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; +bool __bootdata(bootdebug); + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + avail = sizeof(boot_rb) - boot_rb_off - 1; + strscpy(boot_rb + boot_rb_off, str, avail); + boot_rb_off += len + 1; +} + +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + +const char hex_asc[] = "0123456789abcdef"; + +static char *as_hex(char *dst, unsigned long val, int pad) +{ + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + + for (*p-- = '\0'; p >= dst; val >>= 4) + *p-- = hex_asc[val & 0x0f]; + return dst; +} + +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad, bool decimal) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; +} + +static char *symstart(char *p) +{ + while (*p) + p--; + return p + 1; +} + +static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned short *len) +{ + /* symbol entries are in a form "10000 c4 startup\0" */ + char *a = _decompressor_syms_start; + char *b = _decompressor_syms_end; + unsigned long start; + unsigned long size; + char *pivot; + char *endp; + + while (a < b) { + pivot = symstart(a + (b - a) / 2); + start = simple_strtoull(pivot, &endp, 16); + size = simple_strtoull(endp + 1, &endp, 16); + if (ip < start) { + b = pivot; + continue; + } + if (ip > start + size) { + a = pivot + strlen(pivot) + 1; + continue; + } + *off = ip - start; + *len = size; + return endp + 1; + } + return NULL; +} + +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) +{ + unsigned short off; + unsigned short len; + char *p; + + p = findsym((unsigned long)ip, &off, &len); + if (p) { + strscpy(buf, p, MAX_SYMLEN); + /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ + p = buf + strnlen(buf, MAX_SYMLEN - 15); + strscpy(p, "+0x", MAX_SYMLEN - (p - buf)); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); + } else { + as_hex(buf, (unsigned long)ip, 16); + } + return buf; +} + +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; + buf = printk_skip_level(buf); + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) + return; + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(buf); +} + +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + unsigned long ns = tod_to_ns(__get_tod_clock_monotonic()); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + +int boot_printk(const char *fmt, ...) +{ + char buf[1024] = { 0 }; + char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ + bool zero_pad, decimal; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; + va_list args; + char lenmod; + ssize_t len; + int pad; + + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); + + va_start(args, fmt); + for (; p < end && *fmt; fmt++) { + if (*fmt != '%') { + *p++ = *fmt; + continue; + } + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } + decimal = false; + switch (*fmt) { + case 's': + if (lenmod) + goto out; + strval = va_arg(args, char *); + zero_pad = false; + break; + case 'p': + if (*++fmt != 'S' || lenmod) + goto out; + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; + break; + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); + break; + case 'x': + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); + break; + default: + goto out; + } + len = strpad(p, end - p, strval, pad, zero_pad, decimal); + if (len == -E2BIG) + break; + p += len; + } +out: + va_end(args); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } + return len; +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 6cf89314209a..da8337e63a3e 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,12 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include <linux/string.h> #include <linux/elf.h> #include <asm/page-states.h> #include <asm/boot_data.h> +#include <asm/extmem.h> #include <asm/sections.h> +#include <asm/diag288.h> #include <asm/maccess.h> +#include <asm/machine.h> +#include <asm/sysinfo.h> #include <asm/cpu_mf.h> #include <asm/setup.h> +#include <asm/timex.h> #include <asm/kasan.h> #include <asm/kexec.h> #include <asm/sclp.h> @@ -18,7 +24,7 @@ #include "boot.h" #include "uv.h" -unsigned long __bootdata_preserved(__kaslr_offset); +struct vm_layout __bootdata_preserved(vm_layout); unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__memcpy_real_area); pte_t *__bootdata_preserved(memcpy_real_ptep); @@ -29,61 +35,131 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); -unsigned long __bootdata(ident_map_size); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); +union tod_clock __bootdata_preserved(tod_clock_base); +u64 __bootdata_preserved(clock_comparator_max) = -1UL; u64 __bootdata_preserved(stfle_fac_list[16]); -u64 __bootdata_preserved(alt_stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); -struct machine_info machine; - void error(char *x) { - sclp_early_printk("\n\n"); - sclp_early_printk(x); - sclp_early_printk("\n\n -- System halted"); - + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } +static char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE); + +static void detect_machine_type(void) +{ + struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page; + + /* Check current-configuration-level */ + if (stsi(NULL, 0, 0, 0) <= 2) { + set_machine_feature(MFEATURE_LPAR); + return; + } + /* Get virtual-machine cpu information. */ + if (stsi(vmms, 3, 2, 2) || !vmms->count) + return; + /* Detect known hypervisors */ + if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) + set_machine_feature(MFEATURE_KVM); + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) + set_machine_feature(MFEATURE_VM); +} + +static void detect_diag288(void) +{ + /* "BEGIN" in EBCDIC character set */ + static const char cmd[] = "\xc2\xc5\xc7\xc9\xd5"; + unsigned long action, len; + + action = machine_is_vm() ? (unsigned long)cmd : LPARWDT_RESTART; + len = machine_is_vm() ? sizeof(cmd) : 0; + if (__diag288(WDT_FUNC_INIT, MIN_INTERVAL, action, len)) + return; + __diag288(WDT_FUNC_CANCEL, 0, 0, 0); + set_machine_feature(MFEATURE_DIAG288); +} + +static void detect_diag9c(void) +{ + unsigned int cpu; + int rc = 1; + + cpu = stap(); + asm_inline volatile( + " diag %[cpu],%%r0,0x9c\n" + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc) + : [cpu] "d" (cpu) + : "cc", "memory"); + if (!rc) + set_machine_feature(MFEATURE_DIAG9C); +} + +static void reset_tod_clock(void) +{ + union tod_clock clk; + + if (store_tod_clock_ext_cc(&clk) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk)) + disabled_wait(); + memset(&tod_clock_base, 0, sizeof(tod_clock_base)); + tod_clock_base.tod = TOD_UNIX_EPOCH; + get_lowcore()->last_update_clock = TOD_UNIX_EPOCH; +} + static void detect_facilities(void) { - if (test_facility(8)) { - machine.has_edat1 = 1; + if (cpu_has_edat1()) local_ctl_set_bit(0, CR0_EDAT_BIT); + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!cpu_has_nx()) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) + set_machine_feature(MFEATURE_PCI_MIO); + reset_tod_clock(); + if (test_facility(139) && (tod_clock_base.tod >> 63)) { + /* Enable signed clock comparator comparisons */ + set_machine_feature(MFEATURE_SCC); + clock_comparator_max = -1UL >> 1; + local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT); } - if (test_facility(78)) - machine.has_edat2 = 1; - if (test_facility(130)) - machine.has_nx = 1; + if (test_facility(50) && test_facility(73)) { + set_machine_feature(MFEATURE_TX); + local_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT); + } + if (cpu_has_vx()) + local_ctl_set_bit(0, CR0_VECTOR_BIT); } static int cmma_test_essa(void) { - unsigned long reg1, reg2, tmp = 0; + unsigned long tmp = 0; int rc = 1; - psw_t old; /* Test ESSA_GET_STATE */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" + asm_inline volatile( " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n" - " la %[rc],0\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [tmp] "=&d" (tmp), - "+Q" (S390_lowcore.program_new_psw), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), - [cmd] "i" (ESSA_GET_STATE) + "0: lhi %[rc],0\n" + "1:\n" + EX_TABLE(0b, 1b) + : [rc] "+d" (rc), [tmp] "+d" (tmp) + : [cmd] "i" (ESSA_GET_STATE) : "cc", "memory"); return rc; } @@ -102,16 +178,26 @@ static void cmma_init(void) static void setup_lpp(void) { - S390_lowcore.current_pid = 0; - S390_lowcore.lpp = LPP_MAGIC; + get_lowcore()->current_pid = 0; + get_lowcore()->lpp = LPP_MAGIC; if (test_facility(40)) - lpp(&S390_lowcore.lpp); + lpp(&get_lowcore()->lpp); } #ifdef CONFIG_KERNEL_UNCOMPRESSED -unsigned long mem_safe_offset(void) +static unsigned long mem_safe_offset(void) { - return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; + return (unsigned long)_compressed_start; +} + +static void deploy_kernel(void *output) +{ + void *uncompressed_start = (void *)_compressed_start; + + if (output == uncompressed_start) + return; + memmove(output, uncompressed_start, vmlinux.image_size); + memset(uncompressed_start, 0, vmlinux.image_size); } #endif @@ -127,7 +213,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } @@ -141,67 +227,15 @@ static void copy_bootdata(void) memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size); } -#ifdef CONFIG_PIE_BUILD -static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset) -{ - Elf64_Rela *rela_start, *rela_end, *rela; - int r_type, r_sym, rc; - Elf64_Addr loc, val; - Elf64_Sym *dynsym; - - rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start; - rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; - dynsym = (Elf64_Sym *) vmlinux.dynsym_start; - for (rela = rela_start; rela < rela_end; rela++) { - loc = rela->r_offset + offset; - val = rela->r_addend; - r_sym = ELF64_R_SYM(rela->r_info); - if (r_sym) { - if (dynsym[r_sym].st_shndx != SHN_UNDEF) - val += dynsym[r_sym].st_value + offset; - } else { - /* - * 0 == undefined symbol table index (STN_UNDEF), - * used for R_390_RELATIVE, only add KASLR offset - */ - val += offset; - } - r_type = ELF64_R_TYPE(rela->r_info); - rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); - if (rc) - error("Unknown relocation type"); - } -} - -static void kaslr_adjust_got(unsigned long offset) {} -static void rescue_relocs(void) {} -static void free_relocs(void) {} -#else -static int *vmlinux_relocs_64_start; -static int *vmlinux_relocs_64_end; - -static void rescue_relocs(void) -{ - unsigned long size = __vmlinux_relocs_64_end - __vmlinux_relocs_64_start; - - vmlinux_relocs_64_start = (void *)physmem_alloc_top_down(RR_RELOC, size, 0); - vmlinux_relocs_64_end = (void *)vmlinux_relocs_64_start + size; - memmove(vmlinux_relocs_64_start, __vmlinux_relocs_64_start, size); -} - -static void free_relocs(void) -{ - physmem_free(RR_RELOC); -} - -static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset) +static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, + unsigned long offset, unsigned long phys_offset) { int *reloc; long loc; /* Adjust R_390_64 relocations */ - for (reloc = vmlinux_relocs_64_start; reloc < vmlinux_relocs_64_end; reloc++) { - loc = (long)*reloc + offset; + for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) { + loc = (long)*reloc + phys_offset; if (loc < min_addr || loc > max_addr) error("64-bit relocation outside of kernel!\n"); *(u64 *)loc += offset; @@ -213,24 +247,29 @@ static void kaslr_adjust_got(unsigned long offset) u64 *entry; /* - * Even without -fPIE, Clang still uses a global offset table for some - * reason. Adjust the GOT entries. + * Adjust GOT entries, except for ones for undefined weak symbols + * that resolved to zero. This also skips the first three reserved + * entries on s390x that are zero. */ - for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) - *entry += offset; + for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { + if (*entry) + *entry += offset; + } } -#endif /* * Merge information from several sources into a single ident_map_size value. * "ident_map_size" represents the upper limit of physical memory we may ever * reach. It might not be all online memory, but also include standby (offline) - * memory. "ident_map_size" could be lower then actual standby or even online + * memory or memory areas reserved for other means (e.g., memory devices such as + * virtio-mem). + * + * "ident_map_size" could be lower then actual standby/reserved or even online * memory present, due to limiting factors. We should never go above this limit. * It is the size of our identity mapping. * * Consider the following factors: - * 1. max_physmem_end - end of physical memory online or standby. + * 1. max_physmem_end - end of physical memory online, standby or reserved. * Always >= end of the last online memory range (get_physmem_online_end()). * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the * kernel is able to support. @@ -253,17 +292,40 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } -static unsigned long setup_kernel_memory_layout(void) +#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) + +static unsigned long get_vmem_size(unsigned long identity_size, + unsigned long vmemmap_size, + unsigned long vmalloc_size, + unsigned long rte_size) +{ + unsigned long max_mappable, vsize; + + max_mappable = max(identity_size, MAX_DCSS_ADDR); + vsize = round_up(SZ_2G + max_mappable, rte_size) + + round_up(vmemmap_size, rte_size) + + FIXMAP_SIZE + MODULES_LEN + KASLR_LEN; + if (IS_ENABLED(CONFIG_KMSAN)) + vsize += MODULES_LEN * 2; + return size_add(vsize, vmalloc_size); +} + +static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) { unsigned long vmemmap_start; + unsigned long kernel_start; unsigned long asce_limit; unsigned long rte_size; unsigned long pages; @@ -275,12 +337,20 @@ static unsigned long setup_kernel_memory_layout(void) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + - MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE; - vsize = size_add(vsize, vmalloc_size); - if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) { + BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); + BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); + vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); + if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || + (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; - rte_size = _REGION2_SIZE; + if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) { + rte_size = _REGION2_SIZE; + vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE); + } else { + rte_size = _REGION3_SIZE; + } } else { asce_limit = _REGION2_SIZE; rte_size = _REGION3_SIZE; @@ -290,38 +360,93 @@ static unsigned long setup_kernel_memory_layout(void) * Forcing modules and vmalloc area under the ultravisor * secure storage limit, so that any vmalloc allocation * we do could be used to back secure guest storage. + * + * Assume the secure storage limit always exceeds _REGION2_SIZE, + * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN + BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE); - __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, - sizeof(struct lowcore)); - MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); + vsize = min(vsize, vmax); + if (kaslr_enabled()) { + unsigned long kernel_end, kaslr_len, slots, pos; + + kaslr_len = max(KASLR_LEN, vmax - vsize); + slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE); + if (get_random(slots, &pos)) + pos = 0; + kernel_end = vmax - pos * THREAD_SIZE; + kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); + } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { + kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); + } else { + kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); + } + __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); + + MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; + if (IS_ENABLED(CONFIG_KMSAN)) + VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ - vsize = round_down(VMALLOC_END / 2, _SEGMENT_SIZE); + vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; + vsize = round_down(vsize, _SEGMENT_SIZE); vmalloc_size = min(vmalloc_size, vsize); + if (IS_ENABLED(CONFIG_KMSAN)) { + /* take 2/3 of vmalloc area for KMSAN shadow and origins */ + vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE); + VMALLOC_END -= vmalloc_size * 2; + } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); + + __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, + sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ - pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); + pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); pages = SECTION_ALIGN_UP(pages); /* keep vmemmap_start aligned to a top level region table entry */ - vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); - vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); - /* maximum mappable address as seen by arch_get_mappable_range() */ - max_mappable = vmemmap_start; + vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size); /* make sure identity map doesn't overlay with vmemmap */ ident_map_size = min(ident_map_size, vmemmap_start); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); - /* make sure vmemmap doesn't overlay with vmalloc area */ - VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START); + /* make sure vmemmap doesn't overlay with absolute lowcore area */ + if (vmemmap_start + vmemmap_size > __abs_lowcore) { + vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page); + ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE; + } vmemmap = (struct page *)vmemmap_start; + /* maximum address for which linear mapping could be created (DCSS, memory) */ + BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); + max_mappable = max(ident_map_size, MAX_DCSS_ADDR); + max_mappable = min(max_mappable, vmemmap_start); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -329,9 +454,9 @@ static unsigned long setup_kernel_memory_layout(void) /* * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. */ -static void clear_bss_section(unsigned long vmlinux_lma) +static void clear_bss_section(unsigned long kernel_start) { - memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size); + memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size); } /* @@ -348,22 +473,17 @@ static void setup_vmalloc_size(void) vmalloc_size = max(size, vmalloc_size); } -static void kaslr_adjust_vmlinux_info(unsigned long offset) +static void kaslr_adjust_vmlinux_info(long offset) { - *(unsigned long *)(&vmlinux.entry) += offset; vmlinux.bootdata_off += offset; vmlinux.bootdata_preserved_off += offset; -#ifdef CONFIG_PIE_BUILD - vmlinux.rela_dyn_start += offset; - vmlinux.rela_dyn_end += offset; - vmlinux.dynsym_start += offset; -#else vmlinux.got_start += offset; vmlinux.got_end += offset; -#endif vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -375,21 +495,32 @@ static void kaslr_adjust_vmlinux_info(unsigned long offset) void startup_kernel(void) { + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0; + unsigned long kernel_size = TEXT_OFFSET + vmlinux_size; + unsigned long kaslr_large_page_offset; unsigned long max_physmem_end; - unsigned long vmlinux_lma = 0; - unsigned long amode31_lma = 0; unsigned long asce_limit; unsigned long safe_addr; - void *img; psw_t psw; setup_lpp(); - safe_addr = mem_safe_offset(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* - * Reserve decompressor memory together with decompression heap, buffer and - * memory which might be occupied by uncompressed kernel at default 1Mb - * position (if KASLR is off or failed). + * Non-randomized kernel physical start address must be _SEGMENT_SIZE + * aligned (see blow). + */ + nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); + safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size); + + /* + * Reserve decompressor memory together with decompression heap, + * buffer and memory which might be occupied by uncompressed kernel + * (if KASLR is off or failed). */ physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) @@ -397,52 +528,82 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); + sclp_early_detect_machine_features(); detect_facilities(); + detect_diag9c(); + detect_machine_type(); + /* detect_diag288() needs machine type */ + detect_diag288(); cmma_init(); sanitize_prot_virt_host(); max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); - asce_limit = setup_kernel_memory_layout(); + asce_limit = setup_kernel_memory_layout(kernel_size); /* got final ident_map_size, physmem allocations could be performed now */ physmem_set_usable_limit(ident_map_size); detect_physmem_online_ranges(max_physmem_end); save_ipl_cert_comp_list(); rescue_initrd(safe_addr, ident_map_size); - rescue_relocs(); + /* + * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower + * 20 bits (the offset within a large page) are zero. Copy the last + * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to + * __kaslr_offset_phys. + * + * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset + * are identical, which is required to allow for large mappings of the + * kernel image. + */ + kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size, - THREAD_SIZE, vmlinux.default_lma, - ident_map_size); - if (vmlinux_lma) { - __kaslr_offset = vmlinux_lma - vmlinux.default_lma; - kaslr_adjust_vmlinux_info(__kaslr_offset); - } - } - vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma; - physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size); - - if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { - img = decompress_kernel(); - memmove((void *)vmlinux_lma, img, vmlinux.image_size); - } else if (__kaslr_offset) { - img = (void *)vmlinux.default_lma; - memmove((void *)vmlinux_lma, img, vmlinux.image_size); - memset(img, 0, vmlinux.image_size); + unsigned long size = vmlinux_size + kaslr_large_page_offset; + + text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size); } + if (!text_lma) + text_lma = nokaslr_text_lma; + text_lma |= kaslr_large_page_offset; + + /* + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is + * never accessed via the kernel image mapping as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, this region could be used for something else and does + * not need to be reserved. See how it is skipped in setup_vmem(). + */ + __kaslr_offset_phys = text_lma - TEXT_OFFSET; + kaslr_adjust_vmlinux_info(__kaslr_offset_phys); + physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size); + deploy_kernel((void *)text_lma); /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); - if (kaslr_enabled()) - amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); - amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size; + + /* + * In case KASLR is enabled the randomized location of .amode31 + * section might overlap with .vmlinux.relocs section. To avoid that + * the below randomize_within_range() could have been called with + * __vmlinux_relocs_64_end as the lower range address. However, + * .amode31 section is written to by the decompressed kernel - at + * that time the contents of .vmlinux.relocs is not needed anymore. + * Conversely, .vmlinux.relocs is read only by the decompressor, even + * before the kernel started. Therefore, in case the two sections + * overlap there is no risk of corrupting any data. + */ + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); + } + if (!amode31_lma) + amode31_lma = text_lma - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* @@ -458,23 +619,28 @@ void startup_kernel(void) * - copy_bootdata() must follow setup_vmem() to propagate changes * to bootdata made by setup_vmem() */ - clear_bss_section(vmlinux_lma); - kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, __kaslr_offset); + clear_bss_section(text_lma); + kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size, + __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); - free_relocs(); - setup_vmem(asce_limit); + setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); /* * Save KASLR offset for early dumps, before vmcore_info is set. * Mark as uneven to distinguish from real vmcore_info pointer. */ - S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0; + get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0; /* * Jump to the decompressed kernel entry point and switch DAT mode on. */ - psw.addr = vmlinux.entry; + psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); __load_psw(psw); } diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index faccb33b462c..bd68161434a6 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -1,11 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#define IN_BOOT_STRING_C 1 #include <linux/ctype.h> #include <linux/kernel.h> #include <linux/errno.h> #undef CONFIG_KASAN #undef CONFIG_KASAN_GENERIC +#undef CONFIG_KMSAN #include "../lib/string.c" +/* + * Duplicate some functions from the common lib/string.c + * instead of fully including it. + */ + int strncmp(const char *cs, const char *ct, size_t count) { unsigned char c1, c2; @@ -22,6 +29,27 @@ int strncmp(const char *cs, const char *ct, size_t count) return 0; } +ssize_t sized_strscpy(char *dst, const char *src, size_t count) +{ + size_t len; + + if (count == 0) + return -E2BIG; + len = strnlen(src, count - 1); + memcpy(dst, src, len); + dst[len] = '\0'; + return src[len] ? -E2BIG : len; +} + +void *memset64(uint64_t *s, uint64_t v, size_t count) +{ + uint64_t *xs = s; + + while (count--) + *xs++ = v; + return s; +} + char *skip_spaces(const char *str) { while (isspace(*str)) diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 1e66d2cbb096..4568e8f81dac 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -8,12 +8,8 @@ #include "uv.h" /* will be used in arch/s390/kernel/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); -#endif -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); -#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) @@ -26,8 +22,8 @@ void uv_query_info(void) if (!test_facility(158)) return; - /* rc==0x100 means that there is additional data we do not process */ - if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) + /* Ignore that there might be more data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != UVC_RC_MORE_DATA) return; if (IS_ENABLED(CONFIG_KVM)) { @@ -50,17 +46,15 @@ void uv_query_info(void) uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver; uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf; uv_info.supp_secret_types = uvcb.supp_secret_types; - uv_info.max_secrets = uvcb.max_secrets; + uv_info.max_assoc_secrets = uvcb.max_assoc_secrets; + uv_info.max_retr_secrets = uvcb.max_retr_secrets; } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; -#endif } -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit) { if (is_prot_virt_host() && uv_info.max_sec_stor_addr) @@ -92,4 +86,3 @@ void sanitize_prot_virt_host(void) { prot_virt_host = is_prot_virt_host_capable(); } -#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h index 0f3070856f8d..da4a4a8d48e0 100644 --- a/arch/s390/boot/uv.h +++ b/arch/s390/boot/uv.h @@ -2,21 +2,8 @@ #ifndef BOOT_UV_H #define BOOT_UV_H -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit); void sanitize_prot_virt_host(void); -#else -static inline unsigned long adjust_to_uv_max(unsigned long limit) -{ - return limit; -} -static inline void sanitize_prot_virt_host(void) {} -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif #endif /* BOOT_UV_H */ diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 09b10bb6e4d0..1d073acd05a7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt +#include <linux/cpufeature.h> #include <linux/sched/task.h> #include <linux/pgtable.h> #include <linux/kasan.h> @@ -9,10 +11,12 @@ #include <asm/ctlreg.h> #include <asm/physmem_info.h> #include <asm/maccess.h> +#include <asm/machine.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -26,14 +30,47 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); enum populate_mode { POPULATE_NONE, POPULATE_DIRECT, + POPULATE_LOWCORE, POPULATE_ABS_LOWCORE, + POPULATE_IDENTITY, + POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -49,24 +86,24 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } -static void kasan_populate_shadow(void) +static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) { pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); unsigned long memgap_start = 0; - unsigned long untracked_end; unsigned long start, end; int i; pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - if (!machine.has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); @@ -76,54 +113,26 @@ static void kasan_populate_shadow(void) __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat(kasan_early_shadow_pte, 1); - /* - * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan | - * | | / | zero page | - * +- vmalloc area -+ / | mapping | - * | vmalloc_size | / | (untracked) | - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ | unmapped | allocated per module - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - * - * Current memory layout (KASAN_VMALLOC): - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan zero page| (untracked) - * | | / | mapping | - * +- vmalloc area -+ / +----------------+ - * | vmalloc_size | / |shallow populate| - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ |shallow populate| - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - */ - for_each_physmem_usable_range(i, &start, &end) { - kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); - if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) - kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate((unsigned long)__identity_va(start), + (unsigned long)__identity_va(end), + POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { + kasan_populate((unsigned long)__identity_va(memgap_start), + (unsigned long)__identity_va(start), + POPULATE_KASAN_ZERO_SHADOW); + } memgap_start = end; } - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); - } else { - untracked_end = MODULES_VADDR; - } + kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); + kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); /* populate kasan shadow for untracked memory */ - kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); - kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, + POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, @@ -180,7 +189,9 @@ static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) } #else -static inline void kasan_populate_shadow(void) {} +static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) +{ +} static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, unsigned long end, enum populate_mode mode) @@ -226,7 +237,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -242,7 +253,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -254,36 +265,82 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); + case POPULATE_KERNEL: + return __kernel_pa(addr); + case POPULATE_IDENTITY: + return __identity_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; +#endif + default: + return INVALID_PHYS_ADDR; + } +} + +static bool large_page_mapping_allowed(enum populate_mode mode) +{ + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; #endif + return true; default: - return -1; + return false; } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - return machine.has_edat2 && - IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; + unsigned long pa, size = end - addr; + + if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - return machine.has_edat1 && - IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; + unsigned long pa, size = end - addr; + + if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -297,22 +354,20 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); - if (!machine.has_nx) - entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_4K, pages); } static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -322,11 +377,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); - if (!machine.has_nx) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -338,14 +392,14 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e } pgtable_pte_populate(pmd, addr, next, mode); } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_1M, pages); } static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -355,11 +409,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); - if (!machine.has_nx) - entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; @@ -371,7 +424,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e } pgtable_pmd_populate(pud, addr, next, mode); } - if (mode == POPULATE_DIRECT) + if (mode == POPULATE_IDENTITY) update_page_count(PG_DIRECT_MAP_2G, pages); } @@ -401,6 +454,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); @@ -418,11 +478,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat } } -void setup_vmem(unsigned long asce_limit) +void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) { + unsigned long lowcore_address = 0; unsigned long start, end; unsigned long asce_type; unsigned long asce_bits; + pgd_t *init_mm_pgd; int i; /* @@ -433,6 +495,15 @@ void setup_vmem(unsigned long asce_limit) for_each_physmem_online_range(i, &start, &end) __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); + /* + * init_mm->pgd contains virtual address of swapper_pg_dir. + * It is unusable at this stage since DAT is yet off. Swap + * it for physical address of swapper_pg_dir and restore + * the virtual address after all page tables are created. + */ + init_mm_pgd = init_mm.pgd; + init_mm.pgd = (pgd_t *)swapper_pg_dir; + if (asce_limit == _REGION1_SIZE) { asce_type = _REGION2_ENTRY_EMPTY; asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; @@ -447,28 +518,49 @@ void setup_vmem(unsigned long asce_limit) __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + if (machine_has_relocated_lowcore()) + lowcore_address = LOWCORE_ALT_ADDRESS; + /* * To allow prefixing the lowcore must be mapped with 4KB pages. * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); - for_each_physmem_usable_range(i, &start, &end) - pgtable_populate(start, end, POPULATE_DIRECT); + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); + for_each_physmem_usable_range(i, &start, &end) { + pgtable_populate((unsigned long)__identity_va(start), + (unsigned long)__identity_va(end), + POPULATE_IDENTITY); + } + + /* + * [kernel_start..kernel_start + TEXT_OFFSET] region is never + * accessed as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. + */ + pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); + pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, POPULATE_NONE); - memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); - kasan_populate_shadow(); + kasan_populate_shadow(kernel_start, kernel_end); - S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; - S390_lowcore.user_asce = s390_invalid_asce; + get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; + get_lowcore()->user_asce = s390_invalid_asce; - local_ctl_load(1, &S390_lowcore.kernel_asce); - local_ctl_load(7, &S390_lowcore.user_asce); - local_ctl_load(13, &S390_lowcore.kernel_asce); + local_ctl_load(1, &get_lowcore()->kernel_asce); + local_ctl_load(7, &get_lowcore()->user_asce); + local_ctl_load(13, &get_lowcore()->kernel_asce); - init_mm.context.asce = S390_lowcore.kernel_asce.val; + init_mm.context.asce = get_lowcore()->kernel_asce.val; + init_mm.pgd = init_mm_pgd; } diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index 3d7ea585ab99..50988022f9ea 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -40,6 +40,7 @@ SECTIONS *(.rodata.*) _erodata = . ; } + EXCEPTION_TABLE(16) .got : { *(.got) } @@ -99,8 +100,22 @@ SECTIONS _decompressor_end = .; + . = ALIGN(4); + .vmlinux.relocs : { + __vmlinux_relocs_64_start = .; + *(.vmlinux.relocs_64) + __vmlinux_relocs_64_end = .; + } + #ifdef CONFIG_KERNEL_UNCOMPRESSED - . = 0x100000; + . = ALIGN(PAGE_SIZE); + . += AMODE31_SIZE; /* .amode31 section */ + + /* + * Make sure the location counter is not less than TEXT_OFFSET. + * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead. + */ + . = MAX(TEXT_OFFSET, ALIGN(1 << 20)); #else . = ALIGN(8); #endif @@ -110,24 +125,6 @@ SECTIONS _compressed_end = .; } -#ifndef CONFIG_PIE_BUILD - /* - * When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire - * uncompressed vmlinux.bin is positioned in the bzImage decompressor - * image at the default kernel LMA of 0x100000, enabling it to be - * executed in-place. However, the size of .vmlinux.relocs could be - * large enough to cause an overlap with the uncompressed kernel at the - * address 0x100000. To address this issue, .vmlinux.relocs is - * positioned after the .rodata.compressed. - */ - . = ALIGN(4); - .vmlinux.relocs : { - __vmlinux_relocs_64_start = .; - *(.vmlinux.relocs_64) - __vmlinux_relocs_64_end = .; - } -#endif - #define SB_TRAILER_SIZE 32 /* Trailer needed for Secure Boot */ . += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */ @@ -169,7 +166,6 @@ SECTIONS /DISCARD/ : { COMMON_DISCARDS *(.eh_frame) - *(__ex_table) *(*__ksymtab*) *(___kcrctab*) } |