aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorAlexander Gordeev <agordeev@linux.ibm.com>2022-07-20 08:22:01 +0200
committerAlexander Gordeev <agordeev@linux.ibm.com>2022-07-28 18:05:23 +0200
commit7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8 (patch)
tree5a76310275502a083a2863fc5903ab6a5dd915b5 /arch/s390/kernel
parents390/setup: rearrange absolute lowcore initialization (diff)
downloadlinux-dev-7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8.tar.xz
linux-dev-7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8.zip
s390/smp: rework absolute lowcore access
Temporary unsetting of the prefix page in memcpy_absolute() routine poses a risk of executing code path with unexpectedly disabled prefix page. This rework avoids the prefix page uninstalling and disabling of normal and machine check interrupts when accessing the absolute zero memory. Although memcpy_absolute() routine can access the whole memory, it is only used to update the absolute zero lowcore. This rework therefore introduces a new mechanism for the absolute zero lowcore access and scraps memcpy_absolute() routine for good. Instead, an area is reserved in the virtual memory that is used for the absolute lowcore access only. That area holds an array of 8KB virtual mappings - one per CPU. Whenever a CPU is brought online, the corresponding item is mapped to the real address of the previously installed prefix page. The absolute zero lowcore access works like this: a CPU calls the new primitive get_abs_lowcore() to obtain its 8KB mapping as a pointer to the struct lowcore. Virtual address references to that pointer get translated to the real addresses of the prefix page, which in turn gets swapped with the absolute zero memory addresses due to prefixing. Once the pointer is not needed it must be released with put_abs_lowcore() primitive: struct lowcore *abs_lc; unsigned long flags; abs_lc = get_abs_lowcore(&flags); abs_lc->... = ...; put_abs_lowcore(abs_lc, flags); To ensure the described mechanism works large segment- and region- table entries must be avoided for the 8KB mappings. Failure to do so results in usage of Region-Frame Absolute Address (RFAA) or Segment-Frame Absolute Address (SFAA) large page fields. In that case absolute addresses would be used to address the prefix page instead of the real ones and the prefixing would get bypassed. Reviewed-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/abs_lowcore.c88
-rw-r--r--arch/s390/kernel/ipl.c9
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/os_info.c9
-rw-r--r--arch/s390/kernel/setup.c34
-rw-r--r--arch/s390/kernel/smp.c34
7 files changed, 155 insertions, 29 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3cbfa9fddd9a..45e4b2f41e05 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
extra-y += head64.o vmlinux.lds
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..dc9f0ecd4695
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+#define ABS_LOWCORE_UNMAPPED 1
+#define ABS_LOWCORE_LAP_ON 2
+#define ABS_LOWCORE_IRQS_ON 4
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+bool __ro_after_init abs_lowcore_mapped;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = vmem_map_4k_page(addr, phys, PAGE_KERNEL);
+ if (rc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
+
+struct lowcore *get_abs_lowcore(unsigned long *flags)
+{
+ unsigned long irq_flags;
+ union ctlreg0 cr0;
+ int cpu;
+
+ *flags = 0;
+ cpu = get_cpu();
+ if (abs_lowcore_mapped) {
+ return ((struct lowcore *)__abs_lowcore) + cpu;
+ } else {
+ if (cpu != 0)
+ panic("Invalid unmapped absolute lowcore access\n");
+ local_irq_save(irq_flags);
+ if (!irqs_disabled_flags(irq_flags))
+ *flags |= ABS_LOWCORE_IRQS_ON;
+ __ctl_store(cr0.val, 0, 0);
+ if (cr0.lap) {
+ *flags |= ABS_LOWCORE_LAP_ON;
+ __ctl_clear_bit(0, 28);
+ }
+ *flags |= ABS_LOWCORE_UNMAPPED;
+ return lowcore_ptr[0];
+ }
+}
+
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
+{
+ if (abs_lowcore_mapped) {
+ if (flags)
+ panic("Invalid mapped absolute lowcore release\n");
+ } else {
+ if (smp_processor_id() != 0)
+ panic("Invalid mapped absolute lowcore access\n");
+ if (!(flags & ABS_LOWCORE_UNMAPPED))
+ panic("Invalid unmapped absolute lowcore release\n");
+ if (flags & ABS_LOWCORE_LAP_ON)
+ __ctl_set_bit(0, 28);
+ if (flags & ABS_LOWCORE_IRQS_ON)
+ local_irq_enable();
+ }
+ put_cpu();
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..325cbf69ebbd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -29,6 +29,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
+ unsigned long flags;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- put_abs_lowcore(ipib, ipib);
- put_abs_lowcore(ipib_checksum, csum);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->ipib = ipib;
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc, flags);
dump_run(trigger);
}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..4579b42286d5 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
@@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ struct lowcore *abs_lc;
+ unsigned long flags;
+
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc, flags);
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..506ccb74d2d0 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/asm-offsets.h>
@@ -57,13 +57,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ unsigned long flags;
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
- put_abs_lowcore(os_info, __pa(ptr));
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc, flags);
}
#ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 8f483132901e..91139a16a44f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@@ -411,8 +411,9 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
+ struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
- struct lowcore *lc;
+ unsigned long flags;
if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
@@ -474,11 +475,13 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
- put_abs_lowcore(restart_stack, lc->restart_stack);
- put_abs_lowcore(restart_fn, lc->restart_fn);
- put_abs_lowcore(restart_data, lc->restart_data);
- put_abs_lowcore(restart_source, lc->restart_source);
- put_abs_lowcore(restart_psw, lc->restart_psw);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ put_abs_lowcore(abs_lc, flags);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
@@ -499,8 +502,8 @@ static void __init setup_lowcore_dat_off(void)
static void __init setup_lowcore_dat_on(void)
{
- struct lowcore *lc = lowcore_ptr[0];
- int cr;
+ struct lowcore *abs_lc;
+ unsigned long flags;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -509,10 +512,15 @@ static void __init setup_lowcore_dat_on(void)
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
__ctl_set_bit(0, 28);
__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
- put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
- put_abs_lowcore(program_new_psw, lc->program_new_psw);
- for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
- put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = S390_lowcore.program_new_psw;
+ memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
+ sizeof(abs_lc->cregs_save_area));
+ put_abs_lowcore(abs_lc, flags);
+ if (abs_lowcore_map(0, lowcore_ptr[0]))
+ panic("Couldn't setup absolute lowcore");
+ abs_lowcore_mapped = true;
}
static struct resource code_resource = {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..40876d809ea6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -45,7 +45,7 @@
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@@ -212,10 +212,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
+ if (abs_lowcore_map(cpu, lc))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@@ -237,6 +241,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
+ abs_lowcore_unmap(cpu);
nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
@@ -315,9 +320,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
- struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned int source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
+ unsigned long flags;
+ lc = lowcore_ptr[pcpu - pcpu_devices];
+ source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
@@ -332,10 +340,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- put_abs_lowcore(restart_stack, stack);
- put_abs_lowcore(restart_fn, (unsigned long)func);
- put_abs_lowcore(restart_data, (unsigned long)data);
- put_abs_lowcore(restart_source, source_cpu);
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc, flags);
}
__bpon();
asm volatile(
@@ -581,6 +591,8 @@ static DEFINE_SPINLOCK(ctl_lock);
void smp_ctl_set_clear_bit(int cr, int bit, bool set)
{
struct ec_creg_mask_parms parms = { .cr = cr, };
+ struct lowcore *abs_lc;
+ unsigned long flags;
u64 ctlreg;
if (set) {
@@ -591,9 +603,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
parms.andval = ~(1UL << bit);
}
spin_lock(&ctl_lock);
- get_abs_lowcore(ctlreg, cregs_save_area[cr]);
+ abs_lc = get_abs_lowcore(&flags);
+ ctlreg = abs_lc->cregs_save_area[cr];
ctlreg = (ctlreg & parms.andval) | parms.orval;
- put_abs_lowcore(cregs_save_area[cr], ctlreg);
+ abs_lc->cregs_save_area[cr] = ctlreg;
+ put_abs_lowcore(abs_lc, flags);
spin_unlock(&ctl_lock);
on_each_cpu(smp_ctl_bit_callback, &parms, 1);
}
@@ -1281,6 +1295,8 @@ static int __init smp_reinit_ipl_cpu(void)
__ctl_clear_bit(0, 28); /* disable lowcore protection */
S390_lowcore.mcesad = mcesad;
__ctl_load(cr0, 0, 0);
+ if (abs_lowcore_map(0, lc))
+ panic("Couldn't remap absolute lowcore");
lowcore_ptr[0] = lc;
local_mcck_enable();
local_irq_restore(flags);