diff options
Diffstat (limited to 'arch/s390/kernel/setup.c')
-rw-r--r-- | arch/s390/kernel/setup.c | 598 |
1 files changed, 243 insertions, 355 deletions
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b2c2f75860e8..ab19ddb09d65 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -37,7 +37,7 @@ #include <linux/root_dev.h> #include <linux/console.h> #include <linux/kernel_stat.h> -#include <linux/dma-contiguous.h> +#include <linux/dma-map-ops.h> #include <linux/device.h> #include <linux/notifier.h> #include <linux/pfn.h> @@ -49,6 +49,8 @@ #include <linux/memory.h> #include <linux/compat.h> #include <linux/start_kernel.h> +#include <linux/hugetlb.h> +#include <linux/kmemleak.h> #include <asm/boot_data.h> #include <asm/ipl.h> @@ -56,7 +58,7 @@ #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/cpcmd.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/nmi.h> #include <asm/irq.h> #include <asm/page.h> @@ -72,7 +74,9 @@ #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/mem_detect.h> +#include <asm/maccess.h> #include <asm/uv.h> +#include <asm/asm-offsets.h> #include "entry.h" /* @@ -87,32 +91,72 @@ EXPORT_SYMBOL(console_devno); unsigned int console_irq = -1; EXPORT_SYMBOL(console_irq); -unsigned long elf_hwcap __read_mostly = 0; -char elf_platform[ELF_PLATFORM_SIZE]; +/* + * Some code and data needs to stay below 2 GB, even when the kernel would be + * relocated above 2 GB, because it has to use 31 bit addresses. + * Such code and data is part of the .amode31 section. + */ +unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31; +unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31; +unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31; +unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31; +struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; +struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; -unsigned long int_hwcap = 0; +/* + * Control registers CR2, CR5 and CR15 are initialized with addresses + * of tables that must be placed below 2G which is handled by the AMODE31 + * sections. + * Because the AMODE31 sections are relocated below 2G at startup, + * the content of control registers CR2, CR5 and CR15 must be updated + * with new addresses after the relocation. The initial initialization of + * control registers occurs in head64.S and then gets updated again after AMODE31 + * relocation. We must access the relevant AMODE31 tables indirectly via + * pointers placed in the .amode31.refs linker section. Those pointers get + * updated automatically during AMODE31 relocation and always contain a valid + * address within AMODE31 sections. + */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST -int __bootdata_preserved(prot_virt_guest); -#endif +static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); + +static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { + [1] = 0xffffffffffffffff +}; + +static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0x80000000, 0, 0, 0 +}; + +static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { + 0, 0, 0x89000000, 0, + 0, 0, 0x8a000000, 0 +}; + +static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; +static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; +static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; +static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); -int __bootdata(memory_end_set); -unsigned long __bootdata(memory_end); -unsigned long __bootdata(vmalloc_size); -unsigned long __bootdata(max_physmem_end); +unsigned long __bootdata(ident_map_size); struct mem_detect_info __bootdata(mem_detect); +struct initrd_data __bootdata(initrd_data); -struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); -struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__swsusp_reset_dma); -unsigned long __bootdata_preserved(__stext_dma); -unsigned long __bootdata_preserved(__etext_dma); -unsigned long __bootdata_preserved(__sdma); -unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); +u64 __bootdata_preserved(stfle_fac_list[16]); +EXPORT_SYMBOL(stfle_fac_list); +u64 __bootdata_preserved(alt_stfle_fac_list[16]); +struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -122,6 +166,7 @@ EXPORT_SYMBOL(VMALLOC_END); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); +unsigned long vmemmap_size; unsigned long MODULES_VADDR; unsigned long MODULES_END; @@ -130,6 +175,14 @@ unsigned long MODULES_END; struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); +DEFINE_STATIC_KEY_FALSE(cpu_has_bear); + +/* + * The Write Back bit position in the physaddr is given by the SLPC PCI. + * Leaving the mask zero always uses write through which is safe + */ +unsigned long mio_wb_bit_mask __ro_after_init; + /* * This is set up by the setup-routine at boot-time * for S390 need to find out, what we have to setup @@ -163,7 +216,7 @@ static void __init set_preferred_console(void) else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); else if (CONSOLE_IS_VT220) - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("ttysclp", 0, NULL); else if (CONSOLE_IS_HVC) add_preferred_console("hvc", 0, NULL); } @@ -248,9 +301,9 @@ static void __init conmode_default(void) #ifdef CONFIG_CRASH_DUMP static void __init setup_zfcpdump(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return; - if (OLDMEM_BASE) + if (oldmem_data.start) return; strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; @@ -303,17 +356,17 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -void *restart_stack __section(.data); +void *restart_stack; unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long) - __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + void *ret; + + ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(ret); + return (unsigned long)ret; #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif @@ -339,24 +392,11 @@ int __init arch_early_irq_init(void) return 0; } -static int __init async_stack_realloc(void) -{ - unsigned long old, new; - - old = S390_lowcore.async_stack - STACK_INIT_OFFSET; - new = stack_alloc(); - if (!new) - panic("Couldn't allocate async stack"); - S390_lowcore.async_stack = new + STACK_INIT_OFFSET; - free_pages(old, THREAD_SIZE_ORDER); - return 0; -} -early_initcall(async_stack_realloc); - void __init arch_call_rest_init(void) { unsigned long stack; + smp_reinit_ipl_cpu(); stack = stack_alloc(); if (!stack) panic("Couldn't allocate kernel stack"); @@ -367,12 +407,18 @@ void __init arch_call_rest_init(void) set_task_stack_end_magic(current); stack += STACK_INIT_OFFSET; S390_lowcore.kernel_stack = stack; - CALL_ON_STACK_NORETURN(rest_init, stack); + call_on_stack_noreturn(rest_init, stack); } static void __init setup_lowcore_dat_off(void) { - struct lowcore *lc; + unsigned long int_psw_mask = PSW_KERNEL_BITS; + struct lowcore *abs_lc, *lc; + unsigned long mcck_stack; + unsigned long flags; + + if (IS_ENABLED(CONFIG_KASAN)) + int_psw_mask |= PSW_MASK_DAT; /* * Setup lowcore for boot cpu @@ -385,16 +431,15 @@ static void __init setup_lowcore_dat_off(void) lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->nodat_stack = ((unsigned long) &init_thread_union) @@ -403,15 +448,8 @@ static void __init setup_lowcore_dat_off(void) lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->preempt_count = S390_lowcore.preempt_count; - lc->stfl_fac_list = S390_lowcore.stfl_fac_list; - memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, - sizeof(lc->stfle_fac_list)); - memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, - sizeof(lc->alt_stfle_fac_list)); - nmi_alloc_boot_cpu(lc); - vdso_alloc_boot_cpu(lc); - lc->sync_enter_timer = S390_lowcore.sync_enter_timer; - lc->async_enter_timer = S390_lowcore.async_enter_timer; + nmi_alloc_mcesa_early(&lc->mcesad); + lc->sys_enter_timer = S390_lowcore.sys_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; lc->user_timer = S390_lowcore.user_timer; lc->system_timer = S390_lowcore.system_timer; @@ -437,32 +475,55 @@ static void __init setup_lowcore_dat_off(void) lc->restart_stack = (unsigned long) restart_stack; lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; - lc->restart_source = -1UL; - - /* Setup absolute zero lowcore */ - mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); - mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); - mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); - mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); - mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); + lc->restart_source = -1U; + + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_stack = lc->restart_stack; + abs_lc->restart_fn = lc->restart_fn; + abs_lc->restart_data = lc->restart_data; + abs_lc->restart_source = lc->restart_source; + abs_lc->restart_psw = lc->restart_psw; + abs_lc->mcesad = lc->mcesad; + put_abs_lowcore(abs_lc, flags); + + mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); + if (!mcck_stack) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, THREAD_SIZE, THREAD_SIZE); + lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); - lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); + lc->preempt_count = PREEMPT_DISABLED; - set_prefix((u32)(unsigned long) lc); + set_prefix(__pa(lc)); lowcore_ptr[0] = lc; } static void __init setup_lowcore_dat_on(void) { + struct lowcore *abs_lc; + unsigned long flags; + __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_set_bit(0, 28); + __ctl_store(S390_lowcore.cregs_save_area, 0, 15); + if (abs_lowcore_map(0, lowcore_ptr[0], true)) + panic("Couldn't setup absolute lowcore"); + abs_lowcore_mapped = true; + abs_lc = get_abs_lowcore(&flags); + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; + abs_lc->program_new_psw = S390_lowcore.program_new_psw; + memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area, + sizeof(abs_lc->cregs_save_area)); + put_abs_lowcore(abs_lc, flags); } static struct resource code_resource = { @@ -489,8 +550,9 @@ static struct resource __initdata *standard_resources[] = { static void __init setup_resources(void) { struct resource *res, *std_res, *sub_res; - struct memblock_region *reg; + phys_addr_t start, end; int j; + u64 i; code_resource.start = (unsigned long) _text; code_resource.end = (unsigned long) _etext - 1; @@ -499,7 +561,7 @@ static void __init setup_resources(void) bss_resource.start = (unsigned long) __bss_start; bss_resource.end = (unsigned long) __bss_stop - 1; - for_each_memblock(memory, reg) { + for_each_mem_range(i, &start, &end) { res = memblock_alloc(sizeof(*res), 8); if (!res) panic("%s: Failed to allocate %zu bytes align=0x%x\n", @@ -507,8 +569,13 @@ static void __init setup_resources(void) res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; - res->start = reg->base; - res->end = reg->base + reg->size - 1; + res->start = start; + /* + * In memblock, end points to the first byte after the + * range while in resourses, end points to the last byte in + * the range. + */ + res->end = end - 1; request_resource(&iomem_resource, res); for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { @@ -539,7 +606,8 @@ static void __init setup_resources(void) * part of the System RAM resource. */ if (crashk_res.end) { - memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); + memblock_add_node(crashk_res.start, resource_size(&crashk_res), + 0, MEMBLOCK_NONE); memblock_reserve(crashk_res.start, resource_size(&crashk_res)); insert_resource(&iomem_resource, &crashk_res); } @@ -548,56 +616,18 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { - unsigned long vmax, tmp; - - /* Choose kernel address space layout: 3 or 4 levels. */ - if (IS_ENABLED(CONFIG_KASAN)) { - vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) - ? _REGION1_SIZE - : _REGION2_SIZE; - } else { - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) - vmax = _REGION2_SIZE; /* 3-level kernel page table */ - else - vmax = _REGION1_SIZE; /* 4-level kernel page table */ - } - - /* module area is at the end of the kernel address space. */ - MODULES_END = vmax; - MODULES_VADDR = MODULES_END - MODULES_LEN; - VMALLOC_END = MODULES_VADDR; - VMALLOC_START = VMALLOC_END - vmalloc_size; - - /* Split remaining virtual space between 1:1 mapping & vmemmap array */ - tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page)); - /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ - tmp = SECTION_ALIGN_UP(tmp); - tmp = VMALLOC_START - tmp * sizeof(struct page); - tmp &= ~((vmax >> 11) - 1); /* align to page table level */ - tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS); - vmemmap = (struct page *) tmp; - - /* Take care that memory_end is set and <= vmemmap */ - memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); -#ifdef CONFIG_KASAN - /* fit in kasan shadow memory region between 1:1 and vmemmap */ - memory_end = min(memory_end, KASAN_SHADOW_START); - vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); -#endif - max_pfn = max_low_pfn = PFN_DOWN(memory_end); - memblock_remove(memory_end, ULONG_MAX); - - pr_notice("The maximum memory size is %luMB\n", memory_end >> 20); + memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size); + max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); + pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } #ifdef CONFIG_CRASH_DUMP /* - * When kdump is enabled, we have to ensure that no memory from - * the area [0 - crashkernel memory size] and - * [crashk_res.start - crashk_res.end] is set offline. + * When kdump is enabled, we have to ensure that no memory from the area + * [0 - crashkernel memory size] is set offline - it will be exchanged with + * the crashkernel memory region when kdump is triggered. The crashkernel + * memory region can never get offlined (pages are unmovable). */ static int kdump_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -608,11 +638,7 @@ static int kdump_mem_notifier(struct notifier_block *nb, return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; - if (arg->start_pfn > PFN_DOWN(crashk_res.end)) - return NOTIFY_OK; - if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) - return NOTIFY_OK; - return NOTIFY_BAD; + return NOTIFY_OK; } static struct notifier_block kdump_mem_nb = { @@ -622,39 +648,6 @@ static struct notifier_block kdump_mem_nb = { #endif /* - * Make sure that the area behind memory_end is protected - */ -static void reserve_memory_end(void) -{ - if (memory_end_set) - memblock_reserve(memory_end, ULONG_MAX); -} - -/* - * Make sure that oldmem, where the dump is stored, is protected - */ -static void reserve_oldmem(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) - /* Forget all memory above the running kdump system */ - memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); -#endif -} - -/* - * Make sure that oldmem, where the dump is stored, is protected - */ -static void remove_oldmem(void) -{ -#ifdef CONFIG_CRASH_DUMP - if (OLDMEM_BASE) - /* Forget all memory above the running kdump system */ - memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX); -#endif -} - -/* * Reserve memory for kdump kernel to be loaded with kexec */ static void __init reserve_crashkernel(void) @@ -664,7 +657,7 @@ static void __init reserve_crashkernel(void) phys_addr_t low, high; int rc; - rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, &crash_base); crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); @@ -678,9 +671,9 @@ static void __init reserve_crashkernel(void) return; } - low = crash_base ?: OLDMEM_BASE; + low = crash_base ?: oldmem_data.start; high = low + crash_size; - if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) { + if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { /* The crashkernel fits into OLDMEM, reuse OLDMEM */ crash_base = low; } else { @@ -694,8 +687,9 @@ static void __init reserve_crashkernel(void) return; } low = crash_base ?: low; - crash_base = memblock_find_in_range(low, high, crash_size, - KEXEC_CRASH_MEM_ALIGN); + crash_base = memblock_phys_alloc_range(crash_size, + KEXEC_CRASH_MEM_ALIGN, + low, high); } if (!crash_base) { @@ -704,10 +698,12 @@ static void __init reserve_crashkernel(void) return; } - if (register_memory_notifier(&kdump_mem_nb)) + if (register_memory_notifier(&kdump_mem_nb)) { + memblock_phys_free(crash_base, crash_size); return; + } - if (!OLDMEM_BASE && MACHINE_IS_VM) + if (!oldmem_data.start && MACHINE_IS_VM) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -726,11 +722,11 @@ static void __init reserve_crashkernel(void) static void __init reserve_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (!INITRD_START || !INITRD_SIZE) + if (!initrd_data.start || !initrd_data.size) return; - initrd_start = INITRD_START; - initrd_end = initrd_start + INITRD_SIZE; - memblock_reserve(INITRD_START, INITRD_SIZE); + initrd_start = (unsigned long)__va(initrd_data.start); + initrd_end = initrd_start + initrd_data.size; + memblock_reserve(initrd_data.start, initrd_data.size); #endif } @@ -758,7 +754,7 @@ static void __init free_mem_detect_info(void) get_mem_detect_reserved(&start, &size); if (size) - memblock_free(start, size); + memblock_phys_free(start, size); } static const char * __init get_mem_info_source(void) @@ -781,8 +777,8 @@ static void __init memblock_add_mem_detect_info(void) unsigned long start, end; int i; - memblock_dbg("physmem info source: %s (%hhd)\n", - get_mem_info_source(), mem_detect.info_source); + pr_debug("physmem info source: %s (%hhd)\n", + get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_mem_detect_block(i, &start, &end) { @@ -790,7 +786,7 @@ static void __init memblock_add_mem_detect_info(void) memblock_physmem_add(start, end - start); } memblock_set_bottom_up(false); - memblock_dump_all(); + memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } /* @@ -799,10 +795,10 @@ static void __init memblock_add_mem_detect_info(void) static void __init check_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD - if (INITRD_START && INITRD_SIZE && - !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) { + if (initrd_data.start && initrd_data.size && + !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_free(INITRD_START, INITRD_SIZE); + memblock_phys_free(initrd_data.start, initrd_data.size); initrd_start = initrd_end = 0; } #endif @@ -813,176 +809,68 @@ static void __init check_initrd(void) */ static void __init reserve_kernel(void) { - unsigned long start_pfn = PFN_UP(__pa(_end)); - - memblock_reserve(0, HEAD_END); - memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - - (unsigned long)_stext); - memblock_reserve(__sdma, __edma - __sdma); + memblock_reserve(0, STARTUP_NORMAL_OFFSET); + memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); + memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); + memblock_reserve(__amode31_base, __eamode31 - __samode31); + memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); + memblock_reserve(__pa(_stext), _end - _stext); } static void __init setup_memory(void) { - struct memblock_region *reg; + phys_addr_t start, end; + u64 i; /* * Init storage key for present memory */ - for_each_memblock(memory, reg) { - storage_key_init_range(reg->base, reg->base + reg->size); - } - psw_set_key(PAGE_DEFAULT_KEY); + for_each_mem_range(i, &start, &end) + storage_key_init_range(start, end); - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); + psw_set_key(PAGE_DEFAULT_KEY); } -/* - * Setup hardware capabilities. - */ -static int __init setup_hwcaps(void) +static void __init relocate_amode31_section(void) { - static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - struct cpuid cpu_id; - int i; + unsigned long amode31_size = __eamode31 - __samode31; + long amode31_offset = __amode31_base - __samode31; + long *ptr; - /* - * The store facility list bits numbers as found in the principles - * of operation are numbered with bit 1UL<<31 as number 0 to - * bit 1UL<<0 as number 31. - * Bit 0: instructions named N3, "backported" to esa-mode - * Bit 2: z/Architecture mode is active - * Bit 7: the store-facility-list-extended facility is installed - * Bit 17: the message-security assist is installed - * Bit 19: the long-displacement facility is installed - * Bit 21: the extended-immediate facility is installed - * Bit 22: extended-translation facility 3 is installed - * Bit 30: extended-translation facility 3 enhancement facility - * These get translated to: - * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1, - * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3, - * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and - * HWCAP_S390_ETF3EH bit 8 (22 && 30). - */ - for (i = 0; i < 6; i++) - if (test_facility(stfl_bits[i])) - elf_hwcap |= 1UL << i; + pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); - if (test_facility(22) && test_facility(30)) - elf_hwcap |= HWCAP_S390_ETF3EH; + /* Move original AMODE31 section to the new one */ + memmove((void *)__amode31_base, (void *)__samode31, amode31_size); + /* Zero out the old AMODE31 section to catch invalid accesses within it */ + memset((void *)__samode31, 0, amode31_size); - /* - * Check for additional facilities with store-facility-list-extended. - * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0 - * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information - * as stored by stfl, bits 32-xxx contain additional facilities. - * How many facility words are stored depends on the number of - * doublewords passed to the instruction. The additional facilities - * are: - * Bit 42: decimal floating point facility is installed - * Bit 44: perform floating point operation facility is installed - * translated to: - * HWCAP_S390_DFP bit 6 (42 && 44). - */ - if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) - elf_hwcap |= HWCAP_S390_DFP; - - /* - * Huge page support HWCAP_S390_HPAGE is bit 7. - */ - if (MACHINE_HAS_EDAT1) - elf_hwcap |= HWCAP_S390_HPAGE; - - /* - * 64-bit register support for 31-bit processes - * HWCAP_S390_HIGH_GPRS is bit 9. - */ - elf_hwcap |= HWCAP_S390_HIGH_GPRS; - - /* - * Transactional execution support HWCAP_S390_TE is bit 10. - */ - if (MACHINE_HAS_TE) - elf_hwcap |= HWCAP_S390_TE; - - /* - * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension - * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX - * instead of facility bit 129. - */ - if (MACHINE_HAS_VX) { - elf_hwcap |= HWCAP_S390_VXRS; - if (test_facility(134)) - elf_hwcap |= HWCAP_S390_VXRS_EXT; - if (test_facility(135)) - elf_hwcap |= HWCAP_S390_VXRS_BCD; - if (test_facility(148)) - elf_hwcap |= HWCAP_S390_VXRS_EXT2; - if (test_facility(152)) - elf_hwcap |= HWCAP_S390_VXRS_PDE; - } - if (test_facility(150)) - elf_hwcap |= HWCAP_S390_SORT; - if (test_facility(151)) - elf_hwcap |= HWCAP_S390_DFLT; - - /* - * Guarded storage support HWCAP_S390_GS is bit 12. - */ - if (MACHINE_HAS_GS) - elf_hwcap |= HWCAP_S390_GS; - - get_cpu_id(&cpu_id); - add_device_randomness(&cpu_id, sizeof(cpu_id)); - switch (cpu_id.machine) { - case 0x2064: - case 0x2066: - default: /* Use "z900" as default for 64 bit kernels. */ - strcpy(elf_platform, "z900"); - break; - case 0x2084: - case 0x2086: - strcpy(elf_platform, "z990"); - break; - case 0x2094: - case 0x2096: - strcpy(elf_platform, "z9-109"); - break; - case 0x2097: - case 0x2098: - strcpy(elf_platform, "z10"); - break; - case 0x2817: - case 0x2818: - strcpy(elf_platform, "z196"); - break; - case 0x2827: - case 0x2828: - strcpy(elf_platform, "zEC12"); - break; - case 0x2964: - case 0x2965: - strcpy(elf_platform, "z13"); - break; - case 0x3906: - case 0x3907: - strcpy(elf_platform, "z14"); - break; - case 0x8561: - case 0x8562: - strcpy(elf_platform, "z15"); - break; - } - - /* - * Virtualization support HWCAP_INT_SIE is bit 0. - */ - if (sclp.has_sief2) - int_hwcap |= HWCAP_INT_SIE; + /* Update all AMODE31 region references */ + for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) + *ptr += amode31_offset; +} - return 0; +/* This must be called after AMODE31 relocation */ +static void __init setup_cr(void) +{ + union ctlreg2 cr2; + union ctlreg5 cr5; + union ctlreg15 cr15; + + __ctl_duct[1] = (unsigned long)__ctl_aste; + __ctl_duct[2] = (unsigned long)__ctl_aste; + __ctl_duct[4] = (unsigned long)__ctl_duald; + + /* Update control registers CR2, CR5 and CR15 */ + __ctl_store(cr2.val, 2, 2); + __ctl_store(cr5.val, 5, 5); + __ctl_store(cr15.val, 15, 15); + cr2.ducto = (unsigned long)__ctl_duct >> 6; + cr5.pasteo = (unsigned long)__ctl_duct >> 6; + cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; + __ctl_load(cr2.val, 2, 2); + __ctl_load(cr5.val, 5, 5); + __ctl_load(cr15.val, 15, 15); } -arch_initcall(setup_hwcaps); /* * Add system information as device randomness @@ -991,14 +879,15 @@ static void __init setup_randomness(void) { struct sysinfo_3_2_2 *vmms; - vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE, - PAGE_SIZE); + vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!vmms) panic("Failed to allocate memory for sysinfo structure\n"); - if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); - memblock_free((unsigned long) vmms, PAGE_SIZE); + memblock_free(vmms, PAGE_SIZE); + + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); } /* @@ -1025,8 +914,7 @@ static void __init setup_control_program_code(void) { union diag318_info diag318_info = { .cpnc = CPNC_LINUX, - .cpvc_linux = 0, - .cpvc_distro = {0}, + .cpvc = 0, }; if (!sclp.has_diag318) @@ -1096,14 +984,12 @@ void __init setup_arch(char **cmdline_p) ROOT_DEV = Root_RAM0; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = (unsigned long) _end; + setup_initial_init_mm(_text, _etext, _edata, _end); if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ @@ -1116,41 +1002,37 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ - reserve_memory_end(); - reserve_oldmem(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); reserve_mem_detect_info(); + memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ memblock_add_mem_detect_info(); free_mem_detect_info(); - remove_oldmem(); - - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - * - * Is this still required? - */ - memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); - setup_memory_end(); + memblock_dump_all(); setup_memory(); - dma_contiguous_reserve(memory_end); + + relocate_amode31_section(); + setup_cr(); + setup_uv(); + dma_contiguous_reserve(ident_map_size); vmcp_cma_reserve(); + if (MACHINE_HAS_EDAT2) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); check_initrd(); reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP /* - * Be aware that smp_save_dump_cpus() triggers a system reset. + * Be aware that smp_save_dump_secondary_cpus() triggers a system reset. * Therefore CPU and device initialization should be done afterwards. */ - smp_save_dump_cpus(); + smp_save_dump_secondary_cpus(); #endif setup_resources(); @@ -1162,16 +1044,22 @@ void __init setup_arch(char **cmdline_p) smp_detect_cpus(); topology_init_early(); + if (test_facility(193)) + static_branch_enable(&cpu_has_bear); + /* * Create kernel page tables and switch to virtual addressing. */ paging_init(); - + memcpy_real_init(); /* * After paging_init created the kernel page table, the new PSWs * in lowcore can now run with DAT enabled. */ setup_lowcore_dat_on(); +#ifdef CONFIG_CRASH_DUMP + smp_save_dump_ipl_cpu(); +#endif /* Setup default console */ conmode_default(); @@ -1181,7 +1069,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_init_branches(); - /* Setup zfcpdump support */ + /* Setup zfcp/nvme dump support */ setup_zfcpdump(); /* Add system specific data to the random pool */ |