aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile30
-rw-r--r--arch/s390/kernel/abs_lowcore.c47
-rw-r--r--arch/s390/kernel/alternative.c125
-rw-r--r--arch/s390/kernel/asm-offsets.c37
-rw-r--r--arch/s390/kernel/cache.c3
-rw-r--r--arch/s390/kernel/cert_store.c813
-rw-r--r--arch/s390/kernel/compat_signal.c38
-rw-r--r--arch/s390/kernel/cpacf.c119
-rw-r--r--arch/s390/kernel/cpcmd.c12
-rw-r--r--arch/s390/kernel/cpufeature.c51
-rw-r--r--arch/s390/kernel/crash_dump.c298
-rw-r--r--arch/s390/kernel/ctlreg.c121
-rw-r--r--arch/s390/kernel/debug.c361
-rw-r--r--arch/s390/kernel/diag/Makefile1
-rw-r--r--arch/s390/kernel/diag/diag.c (renamed from arch/s390/kernel/diag.c)122
-rw-r--r--arch/s390/kernel/diag/diag310.c276
-rw-r--r--arch/s390/kernel/diag/diag324.c224
-rw-r--r--arch/s390/kernel/diag/diag_ioctl.h14
-rw-r--r--arch/s390/kernel/diag/diag_misc.c63
-rw-r--r--arch/s390/kernel/dis.c24
-rw-r--r--arch/s390/kernel/dumpstack.c54
-rw-r--r--arch/s390/kernel/early.c220
-rw-r--r--arch/s390/kernel/early_printk.c16
-rw-r--r--arch/s390/kernel/earlypgm.S23
-rw-r--r--arch/s390/kernel/ebcdic.c2
-rw-r--r--arch/s390/kernel/entry.S610
-rw-r--r--arch/s390/kernel/entry.h7
-rw-r--r--arch/s390/kernel/facility.c21
-rw-r--r--arch/s390/kernel/fpu.c378
-rw-r--r--arch/s390/kernel/ftrace.c172
-rw-r--r--arch/s390/kernel/ftrace.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c9
-rw-r--r--arch/s390/kernel/head64.S23
-rw-r--r--arch/s390/kernel/hiperdispatch.c431
-rw-r--r--arch/s390/kernel/idle.c111
-rw-r--r--arch/s390/kernel/ipl.c881
-rw-r--r--arch/s390/kernel/irq.c55
-rw-r--r--arch/s390/kernel/kexec_elf.c2
-rw-r--r--arch/s390/kernel/kexec_image.c2
-rw-r--r--arch/s390/kernel/kprobes.c151
-rw-r--r--arch/s390/kernel/kprobes_insn_page.S22
-rw-r--r--arch/s390/kernel/lgr.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c92
-rw-r--r--arch/s390/kernel/machine_kexec_file.c19
-rw-r--r--arch/s390/kernel/mcount.S116
-rw-r--r--arch/s390/kernel/module.c52
-rw-r--r--arch/s390/kernel/nmi.c398
-rw-r--r--arch/s390/kernel/nospec-branch.c22
-rw-r--r--arch/s390/kernel/nospec-sysfs.c12
-rw-r--r--arch/s390/kernel/numa.c11
-rw-r--r--arch/s390/kernel/os_info.c47
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c851
-rw-r--r--arch/s390/kernel/perf_cpum_cf_common.c233
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c182
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c664
-rw-r--r--arch/s390/kernel/perf_event.c15
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c436
-rw-r--r--arch/s390/kernel/perf_pai_ext.c757
-rw-r--r--arch/s390/kernel/perf_regs.c8
-rw-r--r--arch/s390/kernel/process.c80
-rw-r--r--arch/s390/kernel/processor.c83
-rw-r--r--arch/s390/kernel/ptrace.c227
-rw-r--r--arch/s390/kernel/reipl.S36
-rw-r--r--arch/s390/kernel/relocate_kernel.S96
-rw-r--r--arch/s390/kernel/rethook.c34
-rw-r--r--arch/s390/kernel/rethook.h7
-rw-r--r--arch/s390/kernel/setup.c426
-rw-r--r--arch/s390/kernel/signal.c35
-rw-r--r--arch/s390/kernel/smp.c526
-rw-r--r--arch/s390/kernel/stacktrace.c111
-rw-r--r--arch/s390/kernel/sthyi.c109
-rw-r--r--arch/s390/kernel/syscall.c78
-rw-r--r--arch/s390/kernel/syscalls/Makefile31
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl23
-rw-r--r--arch/s390/kernel/sysinfo.c88
-rw-r--r--arch/s390/kernel/text_amode31.S89
-rw-r--r--arch/s390/kernel/time.c98
-rw-r--r--arch/s390/kernel/topology.c213
-rw-r--r--arch/s390/kernel/traps.c188
-rw-r--r--arch/s390/kernel/unwind_bc.c4
-rw-r--r--arch/s390/kernel/uprobes.c1
-rw-r--r--arch/s390/kernel/uv.c814
-rw-r--r--arch/s390/kernel/vdso.c162
-rw-r--r--arch/s390/kernel/vdso32/Makefile39
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S10
-rw-r--r--arch/s390/kernel/vdso32/vdso_user_wrapper.S3
-rw-r--r--arch/s390/kernel/vdso64/Makefile53
-rw-r--r--arch/s390/kernel/vdso64/vdso.h1
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S16
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S34
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom-chacha.S181
-rw-r--r--arch/s390/kernel/vdso64/vgetrandom.c14
-rw-r--r--arch/s390/kernel/vmcore_info.c24
-rw-r--r--arch/s390/kernel/vmlinux.lds.S79
-rw-r--r--arch/s390/kernel/vtime.c92
-rw-r--r--arch/s390/kernel/wti.c215
96 files changed, 9194 insertions, 5014 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 27d6b3c7aa06..ea5ed6654050 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -10,6 +10,9 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE)
endif
@@ -33,22 +36,24 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
-obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
+obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o
+obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o
+obj-y += diag/
-extra-y += head64.o vmlinux.lds
+always-$(KBUILD_BUILTIN) += vmlinux.lds
obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
+obj-$(CONFIG_SYSFS) += cpacf.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
+obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o hiperdispatch.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
@@ -56,26 +61,27 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
obj-$(CONFIG_COMPAT) += $(compat-obj-y)
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_KPROBES) += mcount.o
+obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
-
+obj-$(CONFIG_CERT_STORE) += cert_store.o
obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
-obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o
+obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..6252b7d115dd
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ unsigned long phys = __pa(lc);
+ int rc, i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+ if (rc) {
+ /*
+ * Do not unmap allocated page tables in case the
+ * allocation was not requested. In such a case the
+ * request is expected coming from an atomic context,
+ * while the unmap attempt might sleep.
+ */
+ if (alloc) {
+ for (--i; i >= 0; i--) {
+ addr -= PAGE_SIZE;
+ vmem_unmap_4k_page(addr);
+ }
+ }
+ return rc;
+ }
+ addr += PAGE_SIZE;
+ phys += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+ unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+ int i;
+
+ for (i = 0; i < LC_PAGES; i++) {
+ vmem_unmap_4k_page(addr);
+ addr += PAGE_SIZE;
+ }
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index e7bca29f9c34..90c0e6408992 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,75 +1,90 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) "alt: " fmt
+#endif
+
+#include <linux/uaccess.h>
+#include <linux/printk.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
-#include <asm/nospec-branch.h>
+#include <asm/sections.h>
+#include <asm/machine.h>
+
+#ifndef a_debug
+#define a_debug pr_debug
+#endif
+
+#ifndef __kernel_va
+#define __kernel_va(x) (void *)(x)
+#endif
+
+unsigned long __bootdata_preserved(machine_features[1]);
+
+struct alt_debug {
+ unsigned long facilities[MAX_FACILITY_BIT / BITS_PER_LONG];
+ unsigned long mfeatures[MAX_MFEATURE_BIT / BITS_PER_LONG];
+ int spec;
+};
-static int __initdata_or_module alt_instr_disabled;
+static struct alt_debug __bootdata_preserved(alt_debug);
-static int __init disable_alternative_instructions(char *str)
+static void alternative_dump(u8 *old, u8 *new, unsigned int len, unsigned int type, unsigned int data)
{
- alt_instr_disabled = 1;
- return 0;
-}
+ char oinsn[33], ninsn[33];
+ unsigned long kptr;
+ unsigned int pos;
-early_param("noaltinstr", disable_alternative_instructions);
+ for (pos = 0; pos < len && 2 * pos < sizeof(oinsn) - 3; pos++)
+ hex_byte_pack(&oinsn[2 * pos], old[pos]);
+ oinsn[2 * pos] = 0;
+ for (pos = 0; pos < len && 2 * pos < sizeof(ninsn) - 3; pos++)
+ hex_byte_pack(&ninsn[2 * pos], new[pos]);
+ ninsn[2 * pos] = 0;
+ kptr = (unsigned long)__kernel_va(old);
+ a_debug("[%d/%3d] %016lx: %s -> %s\n", type, data, kptr, oinsn, ninsn);
+}
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
+ struct alt_debug *d;
struct alt_instr *a;
- u8 *instr, *replacement;
+ bool debug, replace;
+ u8 *old, *new;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
+ d = &alt_debug;
for (a = start; a < end; a++) {
- instr = (u8 *)&a->instr_offset + a->instr_offset;
- replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
- if (!__test_facility(a->facility, alt_stfle_fac_list))
- continue;
-
- if (unlikely(a->instrlen % 2)) {
- WARN_ONCE(1, "cpu alternatives instructions length is "
- "odd, skipping patching\n");
+ if (!(a->ctx & ctx))
continue;
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ debug = __test_facility(a->data, d->facilities);
+ break;
+ case ALT_TYPE_FEATURE:
+ replace = test_machine_feature(a->data);
+ debug = __test_machine_feature(a->data, d->mfeatures);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ debug = d->spec;
+ break;
+ default:
+ replace = false;
+ debug = false;
}
-
- s390_kernel_write(instr, replacement, a->instrlen);
+ if (!replace)
+ continue;
+ old = (u8 *)&a->instr_offset + a->instr_offset;
+ new = (u8 *)&a->repl_offset + a->repl_offset;
+ if (debug)
+ alternative_dump(old, new, a->instrlen, a->type, a->data);
+ s390_kernel_write(old, new, a->instrlen);
}
}
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- if (!alt_instr_disabled)
- __apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
- sync_core();
-}
-
-void text_poke_sync(void)
-{
- on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
- cpus_read_lock();
- text_poke_sync();
- cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index d8ce965c0a97..95ecad9c7d7d 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -5,16 +5,14 @@
* and format the required data.
*/
-#define ASM_OFFSETS_C
-
#include <linux/kbuild.h>
-#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <linux/purgatory.h>
#include <linux/pgtable.h>
-#include <asm/idle.h>
-#include <asm/gmap.h>
+#include <linux/ftrace_regs.h>
+#include <asm/kvm_host_types.h>
#include <asm/stacktrace.h>
+#include <asm/ptrace.h>
int main(void)
{
@@ -28,6 +26,7 @@ int main(void)
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@@ -49,8 +48,8 @@ int main(void)
OFFSET(__PT_R14, pt_regs, gprs[14]);
OFFSET(__PT_R15, pt_regs, gprs[15]);
OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+ OFFSET(__PT_INT_CODE, pt_regs, int_code);
OFFSET(__PT_FLAGS, pt_regs, flags);
- OFFSET(__PT_CR1, pt_regs, cr1);
OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
@@ -62,19 +61,22 @@ int main(void)
OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+ OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+ OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
- /* idle data offsets */
- OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
- OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
- OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
+ OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
+ DEFINE(STACK_FRAME_USER_OVERHEAD, sizeof(struct stack_frame_user));
+ OFFSET(__SFVDSO_RETURN_ADDRESS, stack_frame_vdso_wrapper, return_address);
+ DEFINE(STACK_FRAME_VDSO_OVERHEAD, sizeof(struct stack_frame_vdso_wrapper));
BLANK();
/* hardware defined lowcore locations 0x000 - 0x1ff */
OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
- OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_CODE, lowcore, pgm_code);
+ OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_int_code);
OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
OFFSET(__LC_PER_CODE, lowcore, per_code);
@@ -109,10 +111,9 @@ int main(void)
OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
/* software defined lowcore locations 0x200 - 0xdff*/
- OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
- OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+ OFFSET(__LC_SAVE_AREA, lowcore, save_area);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -121,8 +122,6 @@ int main(void)
OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
- OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
- OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
@@ -137,7 +136,6 @@ int main(void)
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
- OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_LAST_BREAK, lowcore, last_break);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
@@ -160,7 +158,6 @@ int main(void)
OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
BLANK();
/* gmap/sie offsets */
- OFFSET(__GMAP_ASCE, gmap, asce);
OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
/* kexec_sha_region */
@@ -177,5 +174,9 @@ int main(void)
DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+ OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
+ DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 7ee3651d00ab..4f2669030220 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -46,7 +46,7 @@ struct cache_info {
#define CACHE_MAX_LEVEL 8
union cache_topology {
struct cache_info ci[CACHE_MAX_LEVEL];
- unsigned long long raw;
+ unsigned long raw;
};
static const char * const cache_type_string[] = {
@@ -166,5 +166,6 @@ int populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
}
}
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 000000000000..c217a5e64094
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,813 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s): Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES 10
+
+#define VCE_FLAGS_VALID_MASK 0x80
+
+#define ISM_LEN_DWORDS 4
+#define VCSSB_LEN_BYTES 128
+#define VCSSB_LEN_NO_CERTS 4
+#define VCB_LEN_NO_CERTS 64
+#define VC_NAME_LEN_BYTES 64
+
+#define CERT_STORE_KEY_TYPE_NAME "cert_store_key"
+#define CERT_STORE_KEYRING_NAME "cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+ debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+ DIAG320_SUBCODES = 0,
+ DIAG320_STORAGE = 1,
+ DIAG320_CERT_BLOCK = 2,
+};
+
+enum diag320_rc {
+ DIAG320_RC_OK = 0x0001,
+ DIAG320_RC_CS_NOMATCH = 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+ u32 vcssb_length;
+ u8 pad_0x04[3];
+ u8 version;
+ u8 pad_0x08[8];
+ u32 cs_token;
+ u8 pad_0x14[12];
+ u16 total_vc_index_count;
+ u16 max_vc_index_count;
+ u8 pad_0x24[28];
+ u32 max_vce_length;
+ u32 max_vcxe_length;
+ u8 pad_0x48[8];
+ u32 max_single_vcb_length;
+ u32 total_vcb_length;
+ u32 max_single_vcxb_length;
+ u32 total_vcxb_length;
+ u8 pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+ u32 vce_length;
+ u8 flags;
+ u8 key_type;
+ u16 vc_index;
+ u8 vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+ u8 vc_format;
+ u8 pad_0x49;
+ u16 key_id_length;
+ u8 pad_0x4c;
+ u8 vc_hash_type;
+ u16 vc_hash_length;
+ u8 pad_0x50[4];
+ u32 vc_length;
+ u8 pad_0x58[8];
+ u16 vc_hash_offset;
+ u16 vc_offset;
+ u8 pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+ u32 vcb_input_length;
+ u8 pad_0x04[4];
+ u16 first_vc_index;
+ u16 last_vc_index;
+ u32 pad_0x0c;
+ u32 cs_token;
+ u8 pad_0x14[12];
+ u32 vcb_output_length;
+ u8 pad_0x24[3];
+ u8 version;
+ u16 stored_vc_count;
+ u16 remaining_vc_count;
+ u8 pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+ struct vcb_header vcb_hdr;
+ u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+ struct vce_header vce_hdr;
+ u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+ char ascii[VC_NAME_LEN_BYTES + 1];
+
+ /*
+ * First 64 bytes of the key description is key name in EBCDIC CP 500.
+ * Convert it to ASCII for displaying in /proc/keys.
+ */
+ strscpy(ascii, key->description);
+ EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+ seq_puts(m, ascii);
+
+ seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+ if (key_is_positive(key))
+ seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+ .name = CERT_STORE_KEY_TYPE_NAME,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .instantiate = generic_key_instantiate,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = cert_store_key_describe,
+ .read = user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+ pr_dbf_msg("VCB Header:");
+ pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+ pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+ pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+ pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+ pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+ pr_dbf_msg("version: %d", b->vcb_hdr.version);
+ pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+ pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+ unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+ char log_string[VC_NAME_LEN_BYTES + 40];
+
+ pr_dbf_msg("VCE Header:");
+ pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+ pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+ pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+ pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+ pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+ pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+ pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+ pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+ pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+ pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+ pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+ /* Certificate name in ASCII. */
+ memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+ EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+ vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+ snprintf(log_string, sizeof(log_string),
+ "index: %d vce_hdr.vc_name (ASCII): %s",
+ e->vce_hdr.vc_index, vc_name);
+ debug_text_event(cert_store_hexdump, 3, log_string);
+
+ /* Certificate data. */
+ debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+ debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+ debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+ debug_event(cert_store_hexdump, 3,
+ (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+ debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+ debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+ pr_dbf_msg("VCSSB:");
+ pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+ pr_dbf_msg("version: %u", s->version);
+ pr_dbf_msg("cs_token: %u", s->cs_token);
+ pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+ pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+ pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+ pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+ pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+ pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+ pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+ pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, };
+
+ asm_inline volatile(
+ " diag %[rp],%[subcode],0x320\n"
+ "0: nopr %%r7\n"
+ EX_TABLE(0b, 0b)
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "cc", "memory");
+
+ return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X320);
+
+ return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+ u8 hash[SHA256_DIGEST_SIZE];
+ u16 vc_hash_length;
+ u8 *vce_hash;
+
+ vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+ vc_hash_length = vce->vce_hdr.vc_hash_length;
+ sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+ if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+ return 0;
+
+ pr_dbf_msg("SHA256 hash of received certificate does not match");
+ debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+ debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+ debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+ debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+ return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+ if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+ pr_dbf_msg("Certificate entry is invalid");
+ return -EINVAL;
+ }
+ if (vce->vce_hdr.vc_format != 1) {
+ pr_dbf_msg("Certificate format is not supported");
+ return -EINVAL;
+ }
+ if (vce->vce_hdr.vc_hash_type != 1) {
+ pr_dbf_msg("Hash type is not supported");
+ return -EINVAL;
+ }
+
+ return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+ key_ref_t us_keyring_ref;
+
+ us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+ KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+ if (IS_ERR(us_keyring_ref)) {
+ pr_dbf_msg("Couldn't get user session keyring: %ld",
+ PTR_ERR(us_keyring_ref));
+ return ERR_PTR(-ENOKEY);
+ }
+ key_ref_put(us_keyring_ref);
+ return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+ unsigned long num_keys, key_index;
+ size_t keyring_payload_len;
+ key_serial_t *key_array;
+ struct key *current_key;
+ int rc;
+
+ keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+ num_keys = keyring_payload_len / sizeof(key_serial_t);
+ key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+ if (!key_array)
+ return -ENOMEM;
+
+ rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+ if (rc != keyring_payload_len) {
+ pr_dbf_msg("Couldn't read keyring payload");
+ goto out;
+ }
+
+ for (key_index = 0; key_index < num_keys; key_index++) {
+ current_key = key_lookup(key_array[key_index]);
+ pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+ key_invalidate(current_key);
+ key_put(current_key);
+ rc = key_unlink(keyring, current_key);
+ if (rc) {
+ pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+ break;
+ }
+ }
+out:
+ kfree(key_array);
+ return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+ key_ref_t cs_keyring_ref;
+ struct key *cs_keyring;
+
+ cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+ &key_type_keyring, CERT_STORE_KEYRING_NAME,
+ false);
+ if (!IS_ERR(cs_keyring_ref)) {
+ cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+ key_ref_put(cs_keyring_ref);
+ goto found;
+ }
+ /* Search default locations: thread, process, session keyrings */
+ cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+ if (IS_ERR(cs_keyring))
+ return NULL;
+ key_put(cs_keyring);
+found:
+ return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+ struct key *cs_keyring;
+
+ cs_keyring = find_cs_keyring();
+ if (!cs_keyring)
+ return;
+
+ pr_dbf_msg("Found cert_store keyring. Purging...");
+ /*
+ * Remove cert_store_key_type in case invalidation
+ * of old cert_store keys failed (= severe error).
+ */
+ if (invalidate_keyring_keys(cs_keyring))
+ unregister_key_type(&key_type_cert_store_key);
+
+ keyring_clear(cs_keyring);
+ key_invalidate(cs_keyring);
+ key_put(cs_keyring);
+ key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+ static struct key *cs_keyring;
+
+ /* Cleanup previous cs_keyring and all associated keys if any. */
+ cleanup_cs_keys();
+ cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+ GLOBAL_ROOT_GID, current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+ NULL, get_user_session_keyring());
+ if (IS_ERR(cs_keyring)) {
+ pr_dbf_msg("Can't allocate cert_store keyring");
+ return NULL;
+ }
+
+ pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+ /*
+ * In case a previous clean-up ran into an
+ * error and unregistered key type.
+ */
+ register_key_type(&key_type_cert_store_key);
+
+ return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+ size_t len, name_len;
+ u32 cs_token;
+ char *desc;
+
+ cs_token = vcssb->cs_token;
+ /* Description string contains "%64s:%05u:%010u\0". */
+ name_len = sizeof(vce->vce_hdr.vc_name);
+ len = name_len + 1 + 5 + 1 + 10 + 1;
+ desc = kmalloc(len, GFP_KERNEL);
+ if (!desc)
+ return NULL;
+
+ memcpy(desc, vce->vce_hdr.vc_name, name_len);
+ snprintf(desc + name_len, len - name_len, ":%05u:%010u",
+ vce->vce_hdr.vc_index, cs_token);
+
+ return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+ struct key *keyring)
+{
+ key_ref_t newkey;
+ char *desc;
+ int rc;
+
+ desc = get_key_description(vcssb, vce);
+ if (!desc)
+ return -ENOMEM;
+
+ newkey = key_create_or_update(
+ make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+ desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+ vce->vce_hdr.vc_length,
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ rc = PTR_ERR_OR_ZERO(newkey);
+ if (rc) {
+ pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+ rc = -ENOKEY;
+ goto out;
+ }
+
+ key_ref_put(newkey);
+out:
+ kfree(desc);
+ return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+ int diag320_rc;
+
+ memset(vcssb, 0, sizeof(*vcssb));
+ vcssb->vcssb_length = VCSSB_LEN_BYTES;
+ diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+ pr_dbf_vcssb(vcssb);
+
+ if (diag320_rc != DIAG320_RC_OK) {
+ pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+ return -EIO;
+ }
+ if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+ pr_dbf_msg("No certificates available for current configuration");
+ return -ENOKEY;
+ }
+
+ return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+ return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+ memset(vcb, 0, sizeof(*vcb));
+ vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+ vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+ /* Request single entry. */
+ vcb->vcb_hdr.first_vc_index = index;
+ vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+ struct vce *extracted_vce;
+
+ extracted_vce = (struct vce *)vcb->vcb_buf;
+ memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+ pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+ int rc, diag320_rc;
+
+ fill_vcb_input(vcssb, vcb, index);
+
+ diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+ pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+ pr_dbf_vcb(vcb);
+
+ switch (diag320_rc) {
+ case DIAG320_RC_OK:
+ rc = 0;
+ if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+ pr_dbf_msg("No certificate entry for index %u", index);
+ rc = -ENOKEY;
+ } else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+ /* Retry on insufficient space. */
+ pr_dbf_msg("Couldn't get all requested certificates");
+ rc = -EAGAIN;
+ }
+ break;
+ case DIAG320_RC_CS_NOMATCH:
+ pr_dbf_msg("Certificate Store token mismatch");
+ rc = -EAGAIN;
+ break;
+ default:
+ pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+ rc = -EINVAL;
+ break;
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+ struct key *keyring)
+{
+ struct vcb *vcb;
+ struct vce *vce;
+ int rc;
+
+ rc = -ENOMEM;
+ vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+ vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+ if (!vcb || !vce)
+ goto out;
+
+ rc = get_sevcb(vcssb, index, vcb);
+ if (rc)
+ goto out;
+
+ extract_vce_from_sevcb(vcb, vce);
+ rc = check_certificate_valid(vce);
+ if (rc)
+ goto out;
+
+ rc = create_key_from_vce(vcssb, vce, keyring);
+ if (rc)
+ goto out;
+
+ pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+ vfree(vce);
+ vfree(vcb);
+ return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+ int rc, index, count, added;
+
+ count = 0;
+ added = 0;
+ /* Certificate Store entries indices start with 1 and have no gaps. */
+ for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+ pr_dbf_msg("Creating key from VCE %u", index);
+ rc = create_key_from_sevcb(vcssb, index, keyring);
+ count++;
+
+ if (rc == -EAGAIN)
+ return rc;
+
+ if (rc)
+ pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+ else
+ added++;
+ }
+
+ if (added == 0) {
+ pr_dbf_msg("Processed %d entries. No keys created", count);
+ return -ENOKEY;
+ }
+
+ pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+ /*
+ * Do not allow to link more keys to certificate store keyring after all
+ * the VCEs were processed.
+ */
+ rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+ if (rc)
+ pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+ return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+ unsigned long ism[ISM_LEN_DWORDS];
+ int rc;
+
+ rc = diag320(0, ism);
+ if (rc != DIAG320_RC_OK) {
+ pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+ return -ENOENT;
+ }
+
+ debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+ debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+ if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+ pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+ struct key *cs_keyring;
+ struct vcssb *vcssb;
+ int rc;
+
+ rc = -ENOMEM;
+ vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+ if (!vcssb)
+ goto cleanup_keys;
+
+ rc = -ENOENT;
+ if (!sclp.has_diag320) {
+ pr_dbf_msg("Certificate Store is not supported");
+ goto cleanup_keys;
+ }
+
+ rc = query_diag320_subcodes();
+ if (rc)
+ goto cleanup_keys;
+
+ rc = get_vcssb(vcssb);
+ if (rc)
+ goto cleanup_keys;
+
+ rc = -ENOMEM;
+ cs_keyring = create_cs_keyring();
+ if (!cs_keyring)
+ goto cleanup_keys;
+
+ rc = add_certificates_to_keyring(vcssb, cs_keyring);
+ if (rc)
+ goto cleanup_cs_keyring;
+
+ goto out;
+
+cleanup_cs_keyring:
+ key_put(cs_keyring);
+cleanup_keys:
+ cleanup_cs_keys();
+out:
+ kfree(vcssb);
+ return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ if (cs_status_val == -1)
+ return sysfs_emit(buf, "uninitialized\n");
+ else if (cs_status_val == 0)
+ return sysfs_emit(buf, "ok\n");
+
+ return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int rc, retries;
+
+ pr_dbf_msg("Refresh certificate store information requested");
+ rc = mutex_lock_interruptible(&cs_refresh_lock);
+ if (rc)
+ return rc;
+
+ for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+ /* Request certificates from certificate store. */
+ rc = fill_cs_keyring();
+ if (rc)
+ pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+ if (rc != -EAGAIN)
+ break;
+ }
+ cs_status_val = rc;
+ mutex_unlock(&cs_refresh_lock);
+
+ return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+ &cs_status_attr.attr,
+ &refresh_attr.attr,
+ NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+ int rc = -ENOMEM;
+
+ cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+ if (!cert_store_dbf)
+ goto cleanup_dbf;
+
+ cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+ if (!cert_store_hexdump)
+ goto cleanup_dbf;
+
+ debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+ debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+ /* Create directory /sys/firmware/cert_store. */
+ cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+ if (!cert_store_kobj)
+ goto cleanup_dbf;
+
+ rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+ if (rc)
+ goto cleanup_kobj;
+
+ register_key_type(&key_type_cert_store_key);
+
+ return rc;
+
+cleanup_kobj:
+ kobject_put(cert_store_kobj);
+cleanup_dbf:
+ debug_unregister(cert_store_dbf);
+ debug_unregister(cert_store_hexdump);
+
+ return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index eee1ad3e1b29..5a86b9d1da71 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -24,11 +24,12 @@
#include <linux/tty.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <asm/vdso-symbols.h>
+#include <asm/access-regs.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/lowcore.h>
-#include <asm/switch_to.h>
-#include <asm/vdso.h>
+#include <asm/fpu.h>
#include "compat_linux.h"
#include "compat_ptrace.h"
#include "entry.h"
@@ -55,7 +56,7 @@ typedef struct
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fpu_regs();
+ save_user_fpu_regs();
}
/* Load registers after signal return */
@@ -78,7 +79,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+ fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.ufpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
return -EFAULT;
return 0;
@@ -98,10 +99,6 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
return -EINVAL;
- /* Test the floating-point-control word. */
- if (test_fp_ctl(user_sregs.fpregs.fpc))
- return -EINVAL;
-
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
@@ -116,7 +113,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+ fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, &current->thread.ufpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -137,13 +134,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
return -EFAULT;
/* Save vector registers to signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.ufpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -165,15 +162,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
*(__u32 *)&regs->gprs[i] = gprs_high[i];
/* Restore vector registers from signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.ufpu.vxrs[i].low = vxrs[i];
}
return 0;
}
@@ -187,7 +184,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
goto badframe;
set_current_blocked(&set);
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -210,7 +207,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
@@ -265,7 +262,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
* the machine supports it
*/
frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
sizeof(frame->sregs_ext.vxrs_high);
frame = get_sigframe(&ksig->ka, regs, frame_size);
@@ -348,11 +345,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
* the machine supports it
*/
uc_flags = UC_GPRS_HIGH;
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
uc_flags |= UC_VXRS;
- } else
+ } else {
frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+ }
frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
return -EFAULT;
diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c
new file mode 100644
index 000000000000..4b9b34f95d72
--- /dev/null
+++ b/arch/s390/kernel/cpacf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "cpacf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <asm/cpacf.h>
+
+#define CPACF_QUERY(name, instruction) \
+static ssize_t name##_query_raw_read(struct file *fp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_mask_t mask; \
+ \
+ if (!cpacf_query(CPACF_##instruction, &mask)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \
+} \
+static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t))
+
+CPACF_QUERY(km, KM);
+CPACF_QUERY(kmc, KMC);
+CPACF_QUERY(kimd, KIMD);
+CPACF_QUERY(klmd, KLMD);
+CPACF_QUERY(kmac, KMAC);
+CPACF_QUERY(pckmo, PCKMO);
+CPACF_QUERY(kmf, KMF);
+CPACF_QUERY(kmctr, KMCTR);
+CPACF_QUERY(kmo, KMO);
+CPACF_QUERY(pcc, PCC);
+CPACF_QUERY(prno, PRNO);
+CPACF_QUERY(kma, KMA);
+CPACF_QUERY(kdsa, KDSA);
+
+#define CPACF_QAI(name, instruction) \
+static ssize_t name##_query_auth_info_raw_read( \
+ struct file *fp, struct kobject *kobj, \
+ const struct bin_attribute *attr, char *buf, loff_t offs, \
+ size_t count) \
+{ \
+ cpacf_qai_t qai; \
+ \
+ if (!cpacf_qai(CPACF_##instruction, &qai)) \
+ return -EOPNOTSUPP; \
+ return memory_read_from_buffer(buf, count, &offs, &qai, \
+ sizeof(qai)); \
+} \
+static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t))
+
+CPACF_QAI(km, KM);
+CPACF_QAI(kmc, KMC);
+CPACF_QAI(kimd, KIMD);
+CPACF_QAI(klmd, KLMD);
+CPACF_QAI(kmac, KMAC);
+CPACF_QAI(pckmo, PCKMO);
+CPACF_QAI(kmf, KMF);
+CPACF_QAI(kmctr, KMCTR);
+CPACF_QAI(kmo, KMO);
+CPACF_QAI(pcc, PCC);
+CPACF_QAI(prno, PRNO);
+CPACF_QAI(kma, KMA);
+CPACF_QAI(kdsa, KDSA);
+
+static const struct bin_attribute *const cpacf_attrs[] = {
+ &bin_attr_km_query_raw,
+ &bin_attr_kmc_query_raw,
+ &bin_attr_kimd_query_raw,
+ &bin_attr_klmd_query_raw,
+ &bin_attr_kmac_query_raw,
+ &bin_attr_pckmo_query_raw,
+ &bin_attr_kmf_query_raw,
+ &bin_attr_kmctr_query_raw,
+ &bin_attr_kmo_query_raw,
+ &bin_attr_pcc_query_raw,
+ &bin_attr_prno_query_raw,
+ &bin_attr_kma_query_raw,
+ &bin_attr_kdsa_query_raw,
+ &bin_attr_km_query_auth_info_raw,
+ &bin_attr_kmc_query_auth_info_raw,
+ &bin_attr_kimd_query_auth_info_raw,
+ &bin_attr_klmd_query_auth_info_raw,
+ &bin_attr_kmac_query_auth_info_raw,
+ &bin_attr_pckmo_query_auth_info_raw,
+ &bin_attr_kmf_query_auth_info_raw,
+ &bin_attr_kmctr_query_auth_info_raw,
+ &bin_attr_kmo_query_auth_info_raw,
+ &bin_attr_pcc_query_auth_info_raw,
+ &bin_attr_prno_query_auth_info_raw,
+ &bin_attr_kma_query_auth_info_raw,
+ &bin_attr_kdsa_query_auth_info_raw,
+ NULL,
+};
+
+static const struct attribute_group cpacf_attr_grp = {
+ .name = "cpacf",
+ .bin_attrs_new = cpacf_attrs,
+};
+
+static int __init cpacf_init(void)
+{
+ struct device *cpu_root;
+ int rc = 0;
+
+ cpu_root = bus_get_dev_root(&cpu_subsys);
+ if (cpu_root) {
+ rc = sysfs_create_group(&cpu_root->kobj, &cpacf_attr_grp);
+ put_device(cpu_root);
+ }
+ return rc;
+}
+device_initcall(cpacf_init);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 72e106cfd8c7..2f4174b961de 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -16,10 +16,11 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/mm.h>
+#include <linux/io.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/cpcmd.h>
-#include <asm/io.h>
+#include <asm/asm.h>
static DEFINE_SPINLOCK(cpcmd_lock);
static char cpcmd_buf[241];
@@ -45,12 +46,11 @@ static int diag8_response(int cmdlen, char *response, int *rlen)
ry.odd = *rlen;
asm volatile(
" diag %[rx],%[ry],0x8\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [ry] "+d" (ry.pair)
: [rx] "d" (rx.pair)
- : "cc");
- if (cc)
+ : CC_CLOBBER);
+ if (CC_TRANSFORM(cc))
*rlen += ry.odd;
else
*rlen = ry.odd;
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..76210f001028
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/machine.h>
+#include <asm/elf.h>
+
+enum {
+ TYPE_HWCAP,
+ TYPE_FACILITY,
+ TYPE_MACHINE,
+};
+
+struct s390_cpu_feature {
+ unsigned int type : 4;
+ unsigned int num : 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+ [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+ [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+ [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+ [S390_CPU_FEATURE_D288] = {.type = TYPE_MACHINE, .num = MFEATURE_DIAG288},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ struct s390_cpu_feature *feature;
+
+ if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+ return 0;
+ feature = &s390_cpu_features[num];
+ switch (feature->type) {
+ case TYPE_HWCAP:
+ return !!(elf_hwcap & BIT(feature->num));
+ case TYPE_FACILITY:
+ return test_facility(feature->num);
+ case TYPE_MACHINE:
+ return test_machine_feature(feature->num);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..adb164223f8c 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,8 @@
#include <asm/elf.h>
#include <asm/ipl.h>
#include <asm/sclp.h>
+#include <asm/maccess.h>
+#include <asm/fpu.h>
#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -45,7 +47,7 @@ struct save_area {
u64 fprs[16];
u32 fpc;
u32 prefix;
- u64 todpreg;
+ u32 todpreg;
u64 timer;
u64 todcmp;
u64 vxrs_low[16];
@@ -61,9 +63,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
{
struct save_area *sa;
- sa = memblock_alloc(sizeof(*sa), 8);
- if (!sa)
- panic("Failed to allocate save area\n");
+ sa = memblock_alloc_or_panic(sizeof(*sa), 8);
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -109,43 +109,20 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
/* Copy lower halves of vector registers 0-15 */
for (i = 0; i < 16; i++)
- memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8);
+ sa->vxrs_low[i] = vxrs[i].low;
/* Copy vector registers 16-31 */
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-/*
- * Return physical address for virtual address
- */
-static inline void *load_real_addr(void *addr)
-{
- unsigned long real_addr;
-
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_addr) : "a" (addr) : "cc");
- return (void *)real_addr;
-}
-
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long len;
- void *ra;
- int rc;
+ size_t len, copied, res = 0;
while (count) {
if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_kernel(dst, src, len);
- if (rc)
- return rc;
+ copied = memcpy_hsa_iter(iter, src, len);
} else {
/* Check for swapped kdump oldmem areas */
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
@@ -157,56 +134,27 @@ int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
} else {
len = count;
}
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, src, len))
- return -EFAULT;
+ copied = memcpy_real_iter(iter, src, len);
}
- dst += len;
- src += len;
- count -= len;
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- return 0;
+ return res;
}
-/*
- * Copy memory of the old, dumped system to a user space virtual address
- */
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
{
- unsigned long len;
- int rc;
+ struct iov_iter iter;
+ struct kvec kvec;
- while (count) {
- if (!oldmem_data.start && src < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_user(dst, src, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
- src -= oldmem_data.start;
- len = min(count, oldmem_data.size - src);
- } else if (oldmem_data.start && src < oldmem_data.size) {
- len = min(count, oldmem_data.size - src);
- src += oldmem_data.start;
- } else {
- len = count;
- }
- rc = copy_to_user_real(dst, src, count);
- if (rc)
- return rc;
- }
- dst += len;
- src += len;
- count -= len;
- }
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
return 0;
}
@@ -217,26 +165,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long offset)
{
unsigned long src;
- int rc;
- if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
- return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
- if (!csize)
- return 0;
src = pfn_to_phys(pfn) + offset;
-
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- if (rc < 0)
- return rc;
- iov_iter_advance(iter, csize);
- return csize;
+ return copy_oldmem_iter(iter, src, csize);
}
/*
@@ -304,14 +235,16 @@ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
prot);
}
-static const char *nt_name(Elf64_Word type)
+/*
+ * Return true only when in a kdump or stand-alone kdump environment.
+ * Note that /proc/vmcore might also be available in "standard zfcp/nvme dump"
+ * environments, where this function returns false; see dump_available().
+ */
+bool is_kdump_kernel(void)
{
- const char *name = "LINUX";
-
- if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
- name = KEXEC_CORE_NOTE_NAME;
- return name;
+ return oldmem_data.start;
}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
/*
* Initialize ELF note
@@ -337,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
return PTR_ADD(buf, len);
}
-static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
-{
- return nt_init_name(buf, type, desc, d_len, nt_name(type));
-}
+#define nt_init(buf, type, desc) \
+ nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type)
/*
* Calculate the size of ELF note
@@ -356,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name)
return size;
}
-static inline size_t nt_size(Elf64_Word type, int d_len)
-{
- return nt_size_name(d_len, nt_name(type));
-}
+#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type)
/*
* Fill ELF notes for one CPU with save area registers
@@ -380,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
/* Create ELF notes for the CPU */
- ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
- ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
- ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
- ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
- ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
- ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
- ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
- if (MACHINE_HAS_VX) {
- ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
- &sa->vxrs_high, sizeof(sa->vxrs_high));
- ptr = nt_init(ptr, NT_S390_VXRS_LOW,
- &sa->vxrs_low, sizeof(sa->vxrs_low));
+ ptr = nt_init(ptr, PRSTATUS, nt_prstatus);
+ ptr = nt_init(ptr, PRFPREG, nt_fpregset);
+ ptr = nt_init(ptr, S390_TIMER, sa->timer);
+ ptr = nt_init(ptr, S390_TODCMP, sa->todcmp);
+ ptr = nt_init(ptr, S390_TODPREG, sa->todpreg);
+ ptr = nt_init(ptr, S390_CTRS, sa->ctrs);
+ ptr = nt_init(ptr, S390_PREFIX, sa->prefix);
+ if (cpu_has_vx()) {
+ ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high);
+ ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low);
}
return ptr;
}
@@ -404,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void)
struct save_area *sa = NULL;
size_t size;
- size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
- size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
- size += nt_size(NT_S390_TIMER, sizeof(sa->timer));
- size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
- size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
- size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
- size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
- if (MACHINE_HAS_VX) {
- size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
- size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+ size = nt_size(PRSTATUS, struct elf_prstatus);
+ size += nt_size(PRFPREG, elf_fpregset_t);
+ size += nt_size(S390_TIMER, sa->timer);
+ size += nt_size(S390_TODCMP, sa->todcmp);
+ size += nt_size(S390_TODPREG, sa->todpreg);
+ size += nt_size(S390_CTRS, sa->ctrs);
+ size += nt_size(S390_PREFIX, sa->prefix);
+ if (cpu_has_vx()) {
+ size += nt_size(S390_VXRS_HIGH, sa->vxrs_high);
+ size += nt_size(S390_VXRS_LOW, sa->vxrs_low);
}
return size;
@@ -428,8 +354,8 @@ static void *nt_prpsinfo(void *ptr)
memset(&prpsinfo, 0, sizeof(prpsinfo));
prpsinfo.pr_sname = 'R';
- strcpy(prpsinfo.pr_fname, "vmlinux");
- return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+ strscpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, PRPSINFO, prpsinfo);
}
/*
@@ -518,7 +444,7 @@ static void *nt_final(void *ptr)
/*
* Initialize ELF header (new kernel)
*/
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
{
memset(ehdr, 0, sizeof(*ehdr));
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -532,7 +458,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- ehdr->e_phnum = mem_chunk_cnt + 1;
+ /* Number of PT_LOAD program headers plus PT_NOTE program header */
+ ehdr->e_phnum = phdr_count + 1;
return ehdr + 1;
}
@@ -563,27 +490,77 @@ static int get_mem_chunk_cnt(void)
return cnt;
}
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
+{
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize ELF loads (new kernel)
*/
-static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
{
+ unsigned long old_identity_base = 0;
phys_addr_t start, end;
u64 idx;
+ if (os_info_has_vm)
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_filesz = end - start;
- phdr->p_type = PT_LOAD;
- phdr->p_offset = start;
- phdr->p_vaddr = start;
- phdr->p_paddr = start;
- phdr->p_memsz = end - start;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
}
+static bool os_info_has_vm(void)
+{
+ return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
+/*
+ * Prepare PT_LOAD type program header for kernel image region
+ */
+static void text_init(Elf64_Phdr *phdr)
+{
+ unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS);
+ unsigned long start = os_info_old_value(OS_INFO_IMAGE_START);
+ unsigned long end = os_info_old_value(OS_INFO_IMAGE_END);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = start;
+ phdr->p_filesz = end - start;
+ phdr->p_memsz = end - start;
+ phdr->p_offset = start_phys;
+ phdr->p_paddr = start_phys;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize notes (new kernel)
*/
@@ -609,7 +586,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
return ptr;
}
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
{
size_t size;
@@ -617,7 +594,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
/* PT_NOTES */
size += sizeof(Elf64_Phdr);
/* nt_prpsinfo */
- size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+ size += nt_size(PRPSINFO, struct elf_prpsinfo);
/* regsets */
size += get_cpu_cnt() * get_cpu_elf_notes_size();
/* nt_vmcoreinfo */
@@ -625,7 +602,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
/* nt_final */
size += sizeof(Elf64_Nhdr);
/* PT_LOADS */
- size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+ size += phdr_count * sizeof(Elf64_Phdr);
return size;
}
@@ -635,10 +612,10 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
*/
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
- Elf64_Phdr *phdr_notes, *phdr_loads;
- int mem_chunk_cnt;
+ Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+ int mem_chunk_cnt, phdr_text_cnt;
+ size_t alloc_size;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
@@ -656,12 +633,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
}
mem_chunk_cnt = get_mem_chunk_cnt();
+ phdr_text_cnt = os_info_has_vm() ? 1 : 0;
- alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
hdr = kzalloc(alloc_size, GFP_KERNEL);
- /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ /*
+ * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
* a dump with this crash kernel will fail. Panic now to allow other
* dump mechanisms to take over.
*/
@@ -669,18 +648,25 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
panic("s390 kdump allocating elfcorehdr failed");
/* Init elf header */
- ptr = ehdr_init(hdr, mem_chunk_cnt);
+ phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
/* Init program headers */
- phdr_notes = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_loads = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ if (phdr_text_cnt) {
+ phdr_text = phdr_notes + 1;
+ phdr_loads = phdr_text + 1;
+ } else {
+ phdr_loads = phdr_notes + 1;
+ }
+ ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init kernel text program header */
+ if (phdr_text_cnt)
+ text_init(phdr_text);
/* Init loads */
+ loads_init(phdr_loads, phdr_text_cnt);
+ /* Finalize program headers */
hdr_off = PTR_DIFF(ptr, hdr);
- loads_init(phdr_loads, hdr_off);
*addr = (unsigned long long) hdr;
*size = (unsigned long long) hdr_off;
BUG_ON(elfcorehdr_size > alloc_size);
diff --git a/arch/s390/kernel/ctlreg.c b/arch/s390/kernel/ctlreg.c
new file mode 100644
index 000000000000..8cc26cf2c64a
--- /dev/null
+++ b/arch/s390/kernel/ctlreg.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/irqflags.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+#include <asm/abs_lowcore.h>
+#include <asm/ctlreg.h>
+
+/*
+ * ctl_lock guards access to global control register contents which
+ * are kept in the control register save area within absolute lowcore
+ * at physical address zero.
+ */
+static DEFINE_SPINLOCK(system_ctl_lock);
+
+void system_ctlreg_lock(void)
+ __acquires(&system_ctl_lock)
+{
+ spin_lock(&system_ctl_lock);
+}
+
+void system_ctlreg_unlock(void)
+ __releases(&system_ctl_lock)
+{
+ spin_unlock(&system_ctl_lock);
+}
+
+static bool system_ctlreg_area_init __ro_after_init;
+
+void __init system_ctlreg_init_save_area(struct lowcore *lc)
+{
+ struct lowcore *abs_lc;
+
+ abs_lc = get_abs_lowcore();
+ __local_ctl_store(0, 15, lc->cregs_save_area);
+ __local_ctl_store(0, 15, abs_lc->cregs_save_area);
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_area_init = true;
+}
+
+struct ctlreg_parms {
+ unsigned long andval;
+ unsigned long orval;
+ unsigned long val;
+ int request;
+ int cr;
+};
+
+static void ctlreg_callback(void *info)
+{
+ struct ctlreg_parms *pp = info;
+ struct ctlreg regs[16];
+
+ __local_ctl_store(0, 15, regs);
+ if (pp->request == CTLREG_LOAD) {
+ regs[pp->cr].val = pp->val;
+ } else {
+ regs[pp->cr].val &= pp->andval;
+ regs[pp->cr].val |= pp->orval;
+ }
+ __local_ctl_load(0, 15, regs);
+}
+
+static void system_ctlreg_update(void *info)
+{
+ unsigned long flags;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * For very early calls do not call on_each_cpu()
+ * since not everything might be setup.
+ */
+ local_irq_save(flags);
+ ctlreg_callback(info);
+ local_irq_restore(flags);
+ } else {
+ on_each_cpu(ctlreg_callback, info, 1);
+ }
+}
+
+void system_ctlreg_modify(unsigned int cr, unsigned long data, int request)
+{
+ struct ctlreg_parms pp = { .cr = cr, .request = request, };
+ struct lowcore *abs_lc;
+
+ switch (request) {
+ case CTLREG_SET_BIT:
+ pp.orval = 1UL << data;
+ pp.andval = -1UL;
+ break;
+ case CTLREG_CLEAR_BIT:
+ pp.orval = 0;
+ pp.andval = ~(1UL << data);
+ break;
+ case CTLREG_LOAD:
+ pp.val = data;
+ break;
+ }
+ if (system_ctlreg_area_init) {
+ system_ctlreg_lock();
+ abs_lc = get_abs_lowcore();
+ if (request == CTLREG_LOAD) {
+ abs_lc->cregs_save_area[cr].val = pp.val;
+ } else {
+ abs_lc->cregs_save_area[cr].val &= pp.andval;
+ abs_lc->cregs_save_area[cr].val |= pp.orval;
+ }
+ put_abs_lowcore(abs_lc);
+ system_ctlreg_update(&pp);
+ system_ctlreg_unlock();
+ } else {
+ system_ctlreg_update(&pp);
+ }
+}
+EXPORT_SYMBOL(system_ctlreg_modify);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..2a41be2f7925 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/math.h>
#include <linux/minmax.h>
#include <linux/debugfs.h>
@@ -38,13 +39,13 @@
typedef struct file_private_info {
loff_t offset; /* offset of last read in file */
- int act_area; /* number of last formated area */
+ int act_area; /* number of last formatted area */
int act_page; /* act page in given area */
- int act_entry; /* last formated entry (offset */
+ int act_entry; /* last formatted entry (offset */
/* relative to beginning of last */
- /* formated page) */
+ /* formatted page) */
size_t act_entry_offset; /* up to this offset we copied */
- /* in last read the last formated */
+ /* in last read the last formatted */
/* entry to userland */
char temp_buf[2048]; /* buffer for output */
debug_info_t *debug_info_org; /* original debug information */
@@ -60,10 +61,10 @@ typedef struct {
* except of floats, and long long (32 bit)
*
*/
- long args[0];
+ long args[];
} debug_sprintf_entry_t;
-/* internal function prototyes */
+/* internal function prototypes */
static int debug_init(void);
static ssize_t debug_output(struct file *file, char __user *user_buf,
@@ -77,12 +78,14 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area,
static void debug_info_get(debug_info_t *);
static void debug_info_put(debug_info_t *);
static int debug_prolog_level_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_prolog_pages_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf);
+ struct debug_view *view, char *out_buf,
+ size_t out_buf_size);
static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
@@ -90,9 +93,8 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
struct file *file, const char __user *user_buf,
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf);
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, debug_sprintf_entry_t *curr_event);
+ char *out_buf, size_t out_buf_size,
+ const char *in_buf);
static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
static void debug_events_append(debug_info_t *dest, debug_info_t *src);
@@ -139,7 +141,7 @@ struct debug_view debug_sprintf_view = {
"sprintf",
NULL,
&debug_dflt_header_fn,
- (debug_format_proc_t *)&debug_sprintf_format_fn,
+ &debug_sprintf_format_fn,
NULL,
NULL
};
@@ -163,7 +165,6 @@ static const struct file_operations debug_file_ops = {
.write = debug_input,
.open = debug_open,
.release = debug_close,
- .llseek = no_llseek,
};
static struct dentry *debug_debugfs_root_entry;
@@ -250,7 +251,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
rc->level = level;
rc->buf_size = buf_size;
rc->entry_size = sizeof(debug_entry_t) + buf_size;
- strlcpy(rc->name, name, sizeof(rc->name));
+ strscpy(rc->name, name);
memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
refcount_set(&(rc->ref_count), 0);
@@ -351,7 +352,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
for (i = 0; i < in->nr_areas; i++) {
for (j = 0; j < in->pages_per_area; j++)
memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+ rc->active_pages[i] = in->active_pages[i];
+ rc->active_entries[i] = in->active_entries[i];
}
+ rc->active_area = in->active_area;
out:
spin_unlock_irqrestore(&in->lock, flags);
return rc;
@@ -381,7 +385,7 @@ static void debug_info_put(debug_info_t *db_info)
/*
* debug_format_entry:
- * - format one debug entry and return size of formated data
+ * - format one debug entry and return size of formatted data
*/
static int debug_format_entry(file_private_info_t *p_info)
{
@@ -392,8 +396,10 @@ static int debug_format_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
/* print prolog */
- if (view->prolog_proc)
- len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+ if (view->prolog_proc) {
+ len += view->prolog_proc(id_snap, view, p_info->temp_buf,
+ sizeof(p_info->temp_buf));
+ }
goto out;
}
if (!id_snap->areas) /* this is true, if we have a prolog only view */
@@ -403,21 +409,31 @@ static int debug_format_entry(file_private_info_t *p_info)
if (act_entry->clock == 0LL)
goto out; /* empty entry */
- if (view->header_proc)
+ if (view->header_proc) {
len += view->header_proc(id_snap, view, p_info->act_area,
- act_entry, p_info->temp_buf + len);
- if (view->format_proc)
+ act_entry, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len);
+ }
+ if (view->format_proc) {
len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+ sizeof(p_info->temp_buf) - len,
DEBUG_DATA(act_entry));
+ }
out:
return len;
}
-/*
- * debug_next_entry:
- * - goto next entry in p_info
+/**
+ * debug_next_entry - Go to the next entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the next entry. If no further entry
+ * exists the current position is set to one after the end the return value
+ * indicates that no further entries exist.
+ *
+ * Return: True if there are more following entries, false otherwise
*/
-static inline int debug_next_entry(file_private_info_t *p_info)
+static inline bool debug_next_entry(file_private_info_t *p_info)
{
debug_info_t *id;
@@ -425,10 +441,10 @@ static inline int debug_next_entry(file_private_info_t *p_info)
if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
p_info->act_entry = 0;
p_info->act_page = 0;
- goto out;
+ return true;
}
if (!id->areas)
- return 1;
+ return false;
p_info->act_entry += id->entry_size;
/* switch to next page, if we reached the end of the page */
if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
@@ -441,16 +457,93 @@ static inline int debug_next_entry(file_private_info_t *p_info)
p_info->act_page = 0;
}
if (p_info->act_area >= id->nr_areas)
- return 1;
+ return false;
}
-out:
- return 0;
+ return true;
+}
+
+/**
+ * debug_to_act_entry - Go to the currently active entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the currently active
+ * entry of @p_info->debug_info_snap
+ */
+static void debug_to_act_entry(file_private_info_t *p_info)
+{
+ debug_info_t *snap_id;
+
+ snap_id = p_info->debug_info_snap;
+ p_info->act_area = snap_id->active_area;
+ p_info->act_page = snap_id->active_pages[snap_id->active_area];
+ p_info->act_entry = snap_id->active_entries[snap_id->active_area];
+}
+
+/**
+ * debug_prev_entry - Go to the previous entry
+ * @p_info: Private info that is manipulated
+ *
+ * Sets the current position in @p_info to the previous entry. If no previous entry
+ * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value
+ * indicates that no previous entries exist.
+ *
+ * Return: True if there are more previous entries, false otherwise
+ */
+
+static inline bool debug_prev_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if (p_info->act_entry == DEBUG_PROLOG_ENTRY)
+ debug_to_act_entry(p_info);
+ if (!id->areas)
+ return false;
+ p_info->act_entry -= id->entry_size;
+ /* switch to prev page, if we reached the beginning of the page */
+ if (p_info->act_entry < 0) {
+ /* end of previous page */
+ p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size;
+ p_info->act_page--;
+ if (p_info->act_page < 0) {
+ /* previous area */
+ p_info->act_area--;
+ p_info->act_page = id->pages_per_area - 1;
+ }
+ if (p_info->act_area < 0)
+ p_info->act_area = (id->nr_areas - 1) % id->nr_areas;
+ }
+ /* check full circle */
+ if (id->active_area == p_info->act_area &&
+ id->active_pages[id->active_area] == p_info->act_page &&
+ id->active_entries[id->active_area] == p_info->act_entry)
+ return false;
+ return true;
+}
+
+/**
+ * debug_move_entry - Go to next entry in either the forward or backward direction
+ * @p_info: Private info that is manipulated
+ * @reverse: If true go to the next entry in reverse i.e. previous
+ *
+ * Sets the current position in @p_info to the next (@reverse == false) or
+ * previous (@reverse == true) entry.
+ *
+ * Return: True if there are further entries in that direction,
+ * false otherwise.
+ */
+static bool debug_move_entry(file_private_info_t *p_info, bool reverse)
+{
+ if (reverse)
+ return debug_prev_entry(p_info);
+ else
+ return debug_next_entry(p_info);
}
/*
* debug_output:
* - called for user read()
- * - copies formated debug entries to the user buffer
+ * - copies formatted debug entries to the user buffer
*/
static ssize_t debug_output(struct file *file, /* file descriptor */
char __user *user_buf, /* user buffer */
@@ -486,7 +579,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */
}
if (copy_size == formatted_line_residue) {
entry_offset = 0;
- if (debug_next_entry(p_info))
+ if (!debug_next_entry(p_info))
goto out;
}
}
@@ -521,15 +614,51 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
return rc; /* number of input characters */
}
+static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info,
+ struct debug_view *view)
+{
+ debug_info_t *debug_info_snapshot;
+ file_private_info_t *p_info;
+
+ /*
+ * Make snapshot of current debug areas to get it consistent.
+ * To copy all the areas is only needed, if we have a view which
+ * formats the debug areas.
+ */
+ if (!view->format_proc && !view->header_proc)
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ else
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+ if (!debug_info_snapshot)
+ return NULL;
+ p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ if (!p_info) {
+ debug_info_free(debug_info_snapshot);
+ return NULL;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = view;
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ debug_info_get(debug_info);
+
+ return p_info;
+}
+
/*
* debug_open:
* - called for user open()
- * - copies formated output to private_data area of the file
+ * - copies formatted output to private_data area of the file
* handle
*/
static int debug_open(struct inode *inode, struct file *file)
{
- debug_info_t *debug_info, *debug_info_snapshot;
+ debug_info_t *debug_info;
file_private_info_t *p_info;
int i, rc = 0;
@@ -547,42 +676,26 @@ static int debug_open(struct inode *inode, struct file *file)
goto out;
found:
-
- /* Make snapshot of current debug areas to get it consistent. */
- /* To copy all the areas is only needed, if we have a view which */
- /* formats the debug areas. */
-
- if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
- debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
- else
- debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
-
- if (!debug_info_snapshot) {
- rc = -ENOMEM;
- goto out;
- }
- p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+ p_info = debug_file_private_alloc(debug_info, debug_info->views[i]);
if (!p_info) {
- debug_info_free(debug_info_snapshot);
rc = -ENOMEM;
goto out;
}
- p_info->offset = 0;
- p_info->debug_info_snap = debug_info_snapshot;
- p_info->debug_info_org = debug_info;
- p_info->view = debug_info->views[i];
- p_info->act_area = 0;
- p_info->act_page = 0;
- p_info->act_entry = DEBUG_PROLOG_ENTRY;
- p_info->act_entry_offset = 0;
file->private_data = p_info;
- debug_info_get(debug_info);
nonseekable_open(inode, file);
out:
mutex_unlock(&debug_mutex);
return rc;
}
+static void debug_file_private_free(file_private_info_t *p_info)
+{
+ if (p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(p_info);
+}
+
/*
* debug_close:
* - called for user close()
@@ -593,13 +706,59 @@ static int debug_close(struct inode *inode, struct file *file)
file_private_info_t *p_info;
p_info = (file_private_info_t *) file->private_data;
- if (p_info->debug_info_snap)
- debug_info_free(p_info->debug_info_snap);
- debug_info_put(p_info->debug_info_org);
- kfree(file->private_data);
+ debug_file_private_free(p_info);
+ file->private_data = NULL;
return 0; /* success */
}
+/**
+ * debug_dump - Get a textual representation of debug info, or as much as fits
+ * @id: Debug information to use
+ * @view: View with which to dump the debug information
+ * @buf: Buffer the textual debug data representation is written to
+ * @buf_size: Size of the buffer, including the trailing '\0' byte
+ * @reverse: Go backwards from the last written entry
+ *
+ * This function may be used whenever a textual representation of the debug
+ * information is required without using an s390dbf file.
+ *
+ * Note: It is the callers responsibility to supply a view that is compatible
+ * with the debug information data.
+ *
+ * Return: On success returns the number of bytes written to the buffer not
+ * including the trailing '\0' byte. If bug_size == 0 the function returns 0.
+ * On failure an error code less than 0 is returned.
+ */
+ssize_t debug_dump(debug_info_t *id, struct debug_view *view,
+ char *buf, size_t buf_size, bool reverse)
+{
+ file_private_info_t *p_info;
+ size_t size, offset = 0;
+
+ /* Need space for '\0' byte */
+ if (buf_size < 1)
+ return 0;
+ buf_size--;
+
+ p_info = debug_file_private_alloc(id, view);
+ if (!p_info)
+ return -ENOMEM;
+
+ /* There is always at least the DEBUG_PROLOG_ENTRY */
+ do {
+ size = debug_format_entry(p_info);
+ size = min(size, buf_size - offset);
+ memcpy(buf + offset, p_info->temp_buf, size);
+ offset += size;
+ if (offset >= buf_size)
+ break;
+ } while (debug_move_entry(p_info, reverse));
+ debug_file_private_free(p_info);
+ buf[offset] = '\0';
+
+ return offset;
+}
+
/* Create debugfs entries and add to internal list. */
static void _debug_register(debug_info_t *id)
{
@@ -954,7 +1113,7 @@ static int debug_active = 1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
@@ -963,7 +1122,7 @@ static int s390dbf_procactive(struct ctl_table *table, int write,
return 0;
}
-static struct ctl_table s390dbf_table[] = {
+static const struct ctl_table s390dbf_table[] = {
{
.procname = "debug_stoppable",
.data = &debug_stoppable,
@@ -978,17 +1137,6 @@ static struct ctl_table s390dbf_table[] = {
.mode = S_IRUGO | S_IWUSR,
.proc_handler = s390dbf_procactive,
},
- { }
-};
-
-static struct ctl_table s390dbf_dir_table[] = {
- {
- .procname = "s390dbf",
- .maxlen = 0,
- .mode = S_IRUGO | S_IXUGO,
- .child = s390dbf_table,
- },
- { }
};
static struct ctl_table_header *s390dbf_sysctl_header;
@@ -1304,9 +1452,9 @@ static inline int debug_get_uint(char *buf)
*/
static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
- return sprintf(out_buf, "%i\n", id->pages_per_area);
+ return scnprintf(out_buf, out_buf_size, "%i\n", id->pages_per_area);
}
/*
@@ -1353,14 +1501,14 @@ out:
* prints out actual debug level
*/
static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf)
+ char *out_buf, size_t out_buf_size)
{
int rc = 0;
if (id->level == DEBUG_OFF_LEVEL)
- rc = sprintf(out_buf, "-\n");
+ rc = scnprintf(out_buf, out_buf_size, "-\n");
else
- rc = sprintf(out_buf, "%i\n", id->level);
+ rc = scnprintf(out_buf, out_buf_size, "%i\n", id->level);
return rc;
}
@@ -1477,22 +1625,24 @@ out:
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf)
+ char *out_buf, size_t out_buf_size, const char *in_buf)
{
int i, rc = 0;
- for (i = 0; i < id->buf_size; i++)
- rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
- rc += sprintf(out_buf + rc, "| ");
+ for (i = 0; i < id->buf_size; i++) {
+ rc += scnprintf(out_buf + rc, out_buf_size - rc,
+ "%02x ", ((unsigned char *)in_buf)[i]);
+ }
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "| ");
for (i = 0; i < id->buf_size; i++) {
unsigned char c = in_buf[i];
if (isascii(c) && isprint(c))
- rc += sprintf(out_buf + rc, "%c", c);
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "%c", c);
else
- rc += sprintf(out_buf + rc, ".");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, ".");
}
- rc += sprintf(out_buf + rc, "\n");
+ rc += scnprintf(out_buf + rc, out_buf_size - rc, "\n");
return rc;
}
@@ -1500,7 +1650,8 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
* prints header for debug entry
*/
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf)
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
{
unsigned long sec, usec;
unsigned long caller;
@@ -1517,23 +1668,24 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px ",
- area, sec, usec, level, except_str,
- entry->cpu, (void *)caller);
+ rc += scnprintf(out_buf, out_buf_size, "%02i %011ld:%06lu %1u %1s %04u %px ",
+ area, sec, usec, level, except_str,
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
/*
- * prints debug data sprintf-formated:
+ * prints debug data sprintf-formatted:
* debug_sprinf_event/exception calls must be used together with this view
*/
#define DEBUG_SPRINTF_MAX_ARGS 10
-static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, debug_sprintf_entry_t *curr_event)
+int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size, const char *inbuf)
{
+ debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
int num_longs, num_used_args = 0, i, rc = 0;
int index[DEBUG_SPRINTF_MAX_ARGS];
@@ -1544,8 +1696,9 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
goto out; /* bufsize of entry too small */
if (num_longs == 1) {
/* no args, we use only the string */
- strcpy(out_buf, curr_event->string);
- rc = strlen(curr_event->string);
+ rc = strscpy(out_buf, curr_event->string, out_buf_size);
+ if (rc == -E2BIG)
+ rc = out_buf_size;
goto out;
}
@@ -1557,15 +1710,17 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
for (i = 0; i < num_used_args; i++)
index[i] = i;
- rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
- curr_event->args[index[1]], curr_event->args[index[2]],
- curr_event->args[index[3]], curr_event->args[index[4]],
- curr_event->args[index[5]], curr_event->args[index[6]],
- curr_event->args[index[7]], curr_event->args[index[8]],
- curr_event->args[index[9]]);
+ rc = scnprintf(out_buf, out_buf_size,
+ curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
out:
return rc;
}
+EXPORT_SYMBOL(debug_sprintf_format_fn);
/*
* debug_init:
@@ -1573,7 +1728,7 @@ out:
*/
static int __init debug_init(void)
{
- s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+ s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table);
mutex_lock(&debug_mutex);
debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
initialized = 1;
diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile
new file mode 100644
index 000000000000..956aee6c4090
--- /dev/null
+++ b/arch/s390/kernel/diag/Makefile
@@ -0,0 +1 @@
+obj-y := diag_misc.o diag324.o diag.o diag310.o
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c
index a778714e4d8b..56b862ba9be8 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag/diag.c
@@ -11,11 +11,13 @@
#include <linux/cpu.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/trace/diag.h>
#include <asm/sections.h>
-#include "entry.h"
+#include <asm/asm.h>
+#include "../entry.h"
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -35,6 +37,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+ [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
@@ -48,7 +51,11 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+ [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" },
[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+ [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+ [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" },
+ [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" },
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
@@ -57,12 +64,16 @@ struct diag_ops __amode31_ref diag_amode31_ops = {
.diag26c = _diag26c_amode31,
.diag14 = _diag14_amode31,
.diag0c = _diag0c_amode31,
+ .diag8c = _diag8c_amode31,
.diag308_reset = _diag308_reset_amode31
};
static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
static int show_diag_stat(struct seq_file *m, void *v)
{
struct diag_stat *stat;
@@ -140,20 +151,51 @@ void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
EXPORT_SYMBOL(diag_stat_inc_norecursion);
/*
+ * Diagnose 0c: Pseudo Timer
+ */
+void diag0c(struct hypfs_diag0c_entry *data)
+{
+ diag_stat_inc(DIAG_STAT_X00C);
+ diag_amode31_ops.diag0c(virt_to_phys(data));
+}
+
+/*
* Diagnose 14: Input spool file manipulation
+ *
+ * The subcode parameter determines the type of the first parameter rx.
+ * Currently used are the following 3 subcommands:
+ * 0x0: Read the Next Spool File Buffer (Data Record)
+ * 0x28: Position a Spool File to the Designated Record
+ * 0xfff: Retrieve Next File Descriptor
+ *
+ * For subcommands 0x0 and 0xfff, the value of the first parameter is
+ * a virtual address of a memory buffer and needs virtual to physical
+ * address translation. For other subcommands the rx parameter is not
+ * a virtual address.
*/
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
{
diag_stat_inc(DIAG_STAT_X014);
+ switch (subcode) {
+ case 0x0:
+ case 0xfff:
+ rx = virt_to_phys((void *)rx);
+ break;
+ default:
+ /* Do nothing */
+ break;
+ }
return diag_amode31_ops.diag14(rx, ry1, subcode);
}
EXPORT_SYMBOL(diag14);
+#define DIAG204_BUSY_RC 8
+
static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
{
union register_pair rp = { .even = *subcode, .odd = size };
- asm volatile(
+ asm_inline volatile(
" diag %[addr],%[rp],0x204\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -162,12 +204,35 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
return rp.odd;
}
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
int diag204(unsigned long subcode, unsigned long size, void *addr)
{
+ if (addr) {
+ if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+ return -EINVAL;
+ }
+ if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+ addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
diag_stat_inc(DIAG_STAT_X204);
size = __diag204(&subcode, size, addr);
- if (subcode)
- return -1;
+ if (subcode == DIAG204_BUSY_RC)
+ return -EBUSY;
+ else if (subcode)
+ return -EOPNOTSUPP;
return size;
}
EXPORT_SYMBOL(diag204);
@@ -194,17 +259,41 @@ int diag210(struct diag210 *addr)
}
EXPORT_SYMBOL(diag210);
+/*
+ * Diagnose 8C: Access 3270 Display Device Information
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+ static DEFINE_SPINLOCK(diag8c_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag8c_lock, flags);
+
+ diag_stat_inc(DIAG_STAT_X08C);
+ ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+ *addr = *__diag8c_tmp_amode31;
+ spin_unlock_irqrestore(&diag8c_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
int diag224(void *ptr)
{
+ unsigned long addr = __pa(ptr);
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
- asm volatile(
- " diag %1,%2,0x224\n"
- "0: lhi %0,0x0\n"
+ asm_inline volatile("\n"
+ " diag %[type],%[addr],0x224\n"
+ "0: lhi %[rc],0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+ : [rc] "+d" (rc)
+ , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+ : [type] "d" (0), [addr] "d" (addr));
return rc;
}
EXPORT_SYMBOL(diag224);
@@ -215,6 +304,21 @@ EXPORT_SYMBOL(diag224);
int diag26c(void *req, void *resp, enum diag26c_sc subcode)
{
diag_stat_inc(DIAG_STAT_X26C);
- return diag_amode31_ops.diag26c(req, resp, subcode);
+ return diag_amode31_ops.diag26c(virt_to_phys(req), virt_to_phys(resp), subcode);
}
EXPORT_SYMBOL(diag26c);
+
+int diag49c(unsigned long subcode)
+{
+ int cc;
+
+ diag_stat_inc(DIAG_STAT_X49C);
+ asm volatile(
+ " diag %[subcode],0,0x49c\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [subcode] "d" (subcode)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
+}
+EXPORT_SYMBOL(diag49c);
diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c
new file mode 100644
index 000000000000..d6a34454aa5a
--- /dev/null
+++ b/arch/s390/kernel/diag/diag310.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request memory topology information via diag0x310.
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+#define DIAG310_LEVELMIN 1
+#define DIAG310_LEVELMAX 6
+
+enum diag310_sc {
+ DIAG310_SUBC_0 = 0,
+ DIAG310_SUBC_1 = 1,
+ DIAG310_SUBC_4 = 4,
+ DIAG310_SUBC_5 = 5
+};
+
+enum diag310_retcode {
+ DIAG310_RET_SUCCESS = 0x0001,
+ DIAG310_RET_BUSY = 0x0101,
+ DIAG310_RET_OPNOTSUPP = 0x0102,
+ DIAG310_RET_SC4_INVAL = 0x0401,
+ DIAG310_RET_SC4_NODATA = 0x0402,
+ DIAG310_RET_SC5_INVAL = 0x0501,
+ DIAG310_RET_SC5_NODATA = 0x0502,
+ DIAG310_RET_SC5_ESIZE = 0x0503
+};
+
+union diag310_response {
+ u64 response;
+ struct {
+ u64 result : 32;
+ u64 : 16;
+ u64 rc : 16;
+ };
+};
+
+union diag310_req_subcode {
+ u64 subcode;
+ struct {
+ u64 : 48;
+ u64 st : 8;
+ u64 sc : 8;
+ };
+};
+
+union diag310_req_size {
+ u64 size;
+ struct {
+ u64 page_count : 32;
+ u64 : 32;
+ };
+};
+
+static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr, .odd = size };
+
+ diag_stat_inc(DIAG_STAT_X310);
+ asm volatile("diag %[rp],%[subcode],0x310\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static int diag310_result_to_errno(unsigned int result)
+{
+ switch (result) {
+ case DIAG310_RET_BUSY:
+ return -EBUSY;
+ case DIAG310_RET_OPNOTSUPP:
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int diag310_get_subcode_mask(unsigned long *mask)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_0, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *mask = res.response;
+ return 0;
+}
+
+static int diag310_get_memtop_stride(unsigned long *stride)
+{
+ union diag310_response res;
+
+ res.response = diag310(DIAG310_SUBC_1, 0, NULL);
+ if (res.rc != DIAG310_RET_SUCCESS)
+ return diag310_result_to_errno(res.rc);
+ *stride = res.result;
+ return 0;
+}
+
+static int diag310_get_memtop_size(unsigned long *pages, unsigned long level)
+{
+ union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level };
+ union diag310_response res;
+
+ res.response = diag310(req.subcode, 0, NULL);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ *pages = res.result;
+ return 0;
+ case DIAG310_RET_SC4_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC4_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level)
+{
+ union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level };
+ union diag310_req_size req_size = { .page_count = pages };
+ union diag310_response res;
+
+ res.response = diag310(req_sc.subcode, req_size.size, buf);
+ switch (res.rc) {
+ case DIAG310_RET_SUCCESS:
+ return 0;
+ case DIAG310_RET_SC5_NODATA:
+ return -ENODATA;
+ case DIAG310_RET_SC5_ESIZE:
+ return -EOVERFLOW;
+ case DIAG310_RET_SC5_INVAL:
+ return -EINVAL;
+ default:
+ return diag310_result_to_errno(res.rc);
+ }
+}
+
+static int diag310_check_features(void)
+{
+ static int features_available;
+ unsigned long mask;
+ int rc;
+
+ if (READ_ONCE(features_available))
+ return 0;
+ if (!sclp.has_diag310)
+ return -EOPNOTSUPP;
+ rc = diag310_get_subcode_mask(&mask);
+ if (rc)
+ return rc;
+ if (!test_bit_inv(DIAG310_SUBC_1, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_4, &mask))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG310_SUBC_5, &mask))
+ return -EOPNOTSUPP;
+ WRITE_ONCE(features_available, 1);
+ return 0;
+}
+
+static int memtop_get_stride_len(unsigned long *res)
+{
+ static unsigned long memtop_stride;
+ unsigned long stride;
+ int rc;
+
+ stride = READ_ONCE(memtop_stride);
+ if (!stride) {
+ rc = diag310_get_memtop_stride(&stride);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_stride, stride);
+ }
+ *res = stride;
+ return 0;
+}
+
+static int memtop_get_page_count(unsigned long *res, unsigned long level)
+{
+ static unsigned long memtop_pages[DIAG310_LEVELMAX];
+ unsigned long pages;
+ int rc;
+
+ if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN)
+ return -EINVAL;
+ pages = READ_ONCE(memtop_pages[level - 1]);
+ if (!pages) {
+ rc = diag310_get_memtop_size(&pages, level);
+ if (rc)
+ return rc;
+ WRITE_ONCE(memtop_pages[level - 1], pages);
+ }
+ *res = pages;
+ return 0;
+}
+
+long diag310_memtop_stride(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long stride;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ rc = memtop_get_stride_len(&stride);
+ if (rc)
+ return rc;
+ if (put_user(stride, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_len(unsigned long arg)
+{
+ size_t __user *argp = (void __user *)arg;
+ unsigned long pages, level;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, argp))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ if (put_user(pages * PAGE_SIZE, argp))
+ return -EFAULT;
+ return 0;
+}
+
+long diag310_memtop_buf(unsigned long arg)
+{
+ struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg;
+ unsigned long level, pages, data_size;
+ u64 address;
+ void *buf;
+ int rc;
+
+ rc = diag310_check_features();
+ if (rc)
+ return rc;
+ if (get_user(level, &udata->nesting_lvl))
+ return -EFAULT;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ rc = memtop_get_page_count(&pages, level);
+ if (rc)
+ return rc;
+ data_size = pages * PAGE_SIZE;
+ buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ if (!buf)
+ return -ENOMEM;
+ rc = diag310_store_topology_map(buf, pages, level);
+ if (rc)
+ goto out;
+ if (copy_to_user((void __user *)address, buf, data_size))
+ rc = -EFAULT;
+out:
+ vfree(buf);
+ return rc;
+}
diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c
new file mode 100644
index 000000000000..7fa4c0b7eb6c
--- /dev/null
+++ b/arch/s390/kernel/diag/diag324.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Request power readings for resources in a computing environment via
+ * diag 0x324. diag 0x324 stores the power readings in the power information
+ * block (pib).
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#define pr_fmt(fmt) "diag324: " fmt
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
+#include <asm/diag.h>
+#include <asm/sclp.h>
+#include <asm/timex.h>
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+enum subcode {
+ DIAG324_SUBC_0 = 0,
+ DIAG324_SUBC_1 = 1,
+ DIAG324_SUBC_2 = 2,
+};
+
+enum retcode {
+ DIAG324_RET_SUCCESS = 0x0001,
+ DIAG324_RET_SUBC_NOTAVAIL = 0x0103,
+ DIAG324_RET_INSUFFICIENT_SIZE = 0x0104,
+ DIAG324_RET_READING_UNAVAILABLE = 0x0105,
+};
+
+union diag324_response {
+ u64 response;
+ struct {
+ u64 installed : 32;
+ u64 : 16;
+ u64 rc : 16;
+ } sc0;
+ struct {
+ u64 format : 16;
+ u64 : 16;
+ u64 pib_len : 16;
+ u64 rc : 16;
+ } sc1;
+ struct {
+ u64 : 48;
+ u64 rc : 16;
+ } sc2;
+};
+
+union diag324_request {
+ u64 request;
+ struct {
+ u64 : 32;
+ u64 allocated : 16;
+ u64 : 12;
+ u64 sc : 4;
+ } sc2;
+};
+
+struct pib {
+ u32 : 8;
+ u32 num : 8;
+ u32 len : 16;
+ u32 : 24;
+ u32 hlen : 8;
+ u64 : 64;
+ u64 intv;
+ u8 r[];
+} __packed;
+
+struct pibdata {
+ struct pib *pib;
+ ktime_t expire;
+ u64 sequence;
+ size_t len;
+ int rc;
+};
+
+static DEFINE_MUTEX(pibmutex);
+static struct pibdata pibdata;
+
+#define PIBWORK_DELAY (5 * NSEC_PER_SEC)
+
+static void pibwork_handler(struct work_struct *work);
+static DECLARE_DELAYED_WORK(pibwork, pibwork_handler);
+
+static unsigned long diag324(unsigned long subcode, void *addr)
+{
+ union register_pair rp = { .even = (unsigned long)addr };
+
+ diag_stat_inc(DIAG_STAT_X324);
+ asm volatile("diag %[rp],%[subcode],0x324\n"
+ : [rp] "+d" (rp.pair)
+ : [subcode] "d" (subcode)
+ : "memory");
+ return rp.odd;
+}
+
+static void pibwork_handler(struct work_struct *work)
+{
+ struct pibdata *data = &pibdata;
+ ktime_t timedout;
+
+ mutex_lock(&pibmutex);
+ timedout = ktime_add_ns(data->expire, PIBWORK_DELAY);
+ if (ktime_before(ktime_get(), timedout)) {
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ goto out;
+ }
+ vfree(data->pib);
+ data->pib = NULL;
+out:
+ mutex_unlock(&pibmutex);
+}
+
+static void pib_update(struct pibdata *data)
+{
+ union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len };
+ union diag324_response res;
+ int rc;
+
+ memset(data->pib, 0, data->len);
+ res.response = diag324(req.request, data->pib);
+ switch (res.sc2.rc) {
+ case DIAG324_RET_SUCCESS:
+ rc = 0;
+ break;
+ case DIAG324_RET_SUBC_NOTAVAIL:
+ rc = -ENOENT;
+ break;
+ case DIAG324_RET_INSUFFICIENT_SIZE:
+ rc = -EMSGSIZE;
+ break;
+ case DIAG324_RET_READING_UNAVAILABLE:
+ rc = -EBUSY;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ data->rc = rc;
+}
+
+long diag324_pibbuf(unsigned long arg)
+{
+ struct diag324_pib __user *udata = (struct diag324_pib __user *)arg;
+ struct pibdata *data = &pibdata;
+ static bool first = true;
+ u64 address;
+ int rc;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (get_user(address, &udata->address))
+ return -EFAULT;
+ mutex_lock(&pibmutex);
+ rc = -ENOMEM;
+ if (!data->pib)
+ data->pib = vmalloc(data->len);
+ if (!data->pib)
+ goto out;
+ if (first || ktime_after(ktime_get(), data->expire)) {
+ pib_update(data);
+ data->sequence++;
+ data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv));
+ mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY));
+ first = false;
+ }
+ rc = data->rc;
+ if (rc != 0 && rc != -EBUSY)
+ goto out;
+ rc = copy_to_user((void __user *)address, data->pib, data->pib->len);
+ rc |= put_user(data->sequence, &udata->sequence);
+ if (rc)
+ rc = -EFAULT;
+out:
+ mutex_unlock(&pibmutex);
+ return rc;
+}
+
+long diag324_piblen(unsigned long arg)
+{
+ struct pibdata *data = &pibdata;
+
+ if (!data->len)
+ return -EOPNOTSUPP;
+ if (put_user(data->len, (size_t __user *)arg))
+ return -EFAULT;
+ return 0;
+}
+
+static int __init diag324_init(void)
+{
+ union diag324_response res;
+ unsigned long installed;
+
+ if (!sclp.has_diag324)
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_0, NULL);
+ if (res.sc0.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ installed = res.response;
+ if (!test_bit_inv(DIAG324_SUBC_1, &installed))
+ return -EOPNOTSUPP;
+ if (!test_bit_inv(DIAG324_SUBC_2, &installed))
+ return -EOPNOTSUPP;
+ res.response = diag324(DIAG324_SUBC_1, NULL);
+ if (res.sc1.rc != DIAG324_RET_SUCCESS)
+ return -EOPNOTSUPP;
+ pibdata.len = res.sc1.pib_len;
+ return 0;
+}
+device_initcall(diag324_init);
diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h
new file mode 100644
index 000000000000..7080be946785
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_ioctl.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _DIAG_IOCTL_H
+#define _DIAG_IOCTL_H
+
+#include <linux/types.h>
+
+long diag324_pibbuf(unsigned long arg);
+long diag324_piblen(unsigned long arg);
+
+long diag310_memtop_stride(unsigned long arg);
+long diag310_memtop_len(unsigned long arg);
+long diag310_memtop_buf(unsigned long arg);
+
+#endif /* _DIAG_IOCTL_H */
diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c
new file mode 100644
index 000000000000..efffe02ea02e
--- /dev/null
+++ b/arch/s390/kernel/diag/diag_misc.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide diagnose information via misc device /dev/diag.
+ *
+ * Copyright IBM Corp. 2024
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include <uapi/asm/diag.h>
+#include "diag_ioctl.h"
+
+static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ long rc;
+
+ switch (cmd) {
+ case DIAG324_GET_PIBLEN:
+ rc = diag324_piblen(arg);
+ break;
+ case DIAG324_GET_PIBBUF:
+ rc = diag324_pibbuf(arg);
+ break;
+ case DIAG310_GET_STRIDE:
+ rc = diag310_memtop_stride(arg);
+ break;
+ case DIAG310_GET_MEMTOPLEN:
+ rc = diag310_memtop_len(arg);
+ break;
+ case DIAG310_GET_MEMTOPBUF:
+ rc = diag310_memtop_buf(arg);
+ break;
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
+ }
+ return rc;
+}
+
+static const struct file_operations fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .unlocked_ioctl = diag_ioctl,
+};
+
+static struct miscdevice diagdev = {
+ .name = "diag",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &fops,
+ .mode = 0444,
+};
+
+static int diag_init(void)
+{
+ return misc_register(&diagdev);
+}
+
+device_initcall(diag_init);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 90bbb4ea1d08..94eb8168ea44 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -24,8 +24,8 @@
#include <linux/kdebug.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <linux/io.h>
#include <asm/dis.h>
-#include <asm/io.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/debug.h>
@@ -122,6 +122,7 @@ enum {
U8_32, /* 8 bit unsigned value starting at 32 */
U12_16, /* 12 bit unsigned value starting at 16 */
U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_20, /* 16 bit unsigned value starting at 20 */
U16_32, /* 16 bit unsigned value starting at 32 */
U32_16, /* 32 bit unsigned value starting at 16 */
VX_12, /* Vector index register starting at position 12 */
@@ -184,6 +185,7 @@ static const struct s390_operand operands[] = {
[U8_32] = { 8, 32, 0 },
[U12_16] = { 12, 16, 0 },
[U16_16] = { 16, 16, 0 },
+ [U16_20] = { 16, 20, 0 },
[U16_32] = { 16, 32, 0 },
[U32_16] = { 32, 16, 0 },
[VX_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
@@ -257,7 +259,6 @@ static const unsigned char formats[][6] = {
[INSTR_RSL_R0RD] = { D_20, L4_8, B_16, 0, 0, 0 },
[INSTR_RSY_AARD] = { A_8, A_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_CCRD] = { C_8, C_12, D20_20, B_16, 0, 0 },
- [INSTR_RSY_RDRU] = { R_8, D20_20, B_16, U4_12, 0, 0 },
[INSTR_RSY_RRRD] = { R_8, R_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD] = { R_8, U4_12, D20_20, B_16, 0, 0 },
[INSTR_RSY_RURD2] = { R_8, D20_20, B_16, U4_12, 0, 0 },
@@ -300,14 +301,17 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_V0UU2] = { V_8, U16_16, U4_32, 0, 0, 0 },
[INSTR_VRI_V0UUU] = { V_8, U8_16, U8_24, U4_32, 0, 0 },
[INSTR_VRI_VR0UU] = { V_8, R_12, U8_28, U4_24, 0, 0 },
+ [INSTR_VRI_VV0UU] = { V_8, V_12, U8_28, U4_24, 0, 0 },
[INSTR_VRI_VVUU] = { V_8, V_12, U16_16, U4_32, 0, 0 },
[INSTR_VRI_VVUUU] = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
[INSTR_VRI_VVUUU2] = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
[INSTR_VRI_VVV0U] = { V_8, V_12, V_16, U8_24, 0, 0 },
[INSTR_VRI_VVV0UU] = { V_8, V_12, V_16, U8_24, U4_32, 0 },
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
- [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
+ [INSTR_VRI_VVV0UV] = { V_8, V_12, V_16, V_32, U8_24, 0 },
+ [INSTR_VRR_0V0U] = { V_12, U16_20, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
+ [INSTR_VRR_0VVU] = { V_12, V_16, U16_20, 0, 0, 0 },
[INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
@@ -455,21 +459,21 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
if (separator)
ptr += sprintf(ptr, "%c", separator);
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%r%u", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%f%u", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%a%u", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%c%u", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%v%u", value);
else if (operand->flags & OPERAND_PCREL) {
void *pcrel = (void *)((int)value + addr);
ptr += sprintf(ptr, "%px", pcrel);
} else if (operand->flags & OPERAND_SIGNED)
- ptr += sprintf(ptr, "%i", value);
+ ptr += sprintf(ptr, "%i", (int)value);
else
ptr += sprintf(ptr, "%u", value);
if (operand->flags & OPERAND_DISP)
@@ -516,7 +520,7 @@ void show_code(struct pt_regs *regs)
if (copy_from_regs(regs, code + end, (void *)addr, 2))
break;
}
- /* Code snapshot useable ? */
+ /* Code snapshot usable ? */
if ((regs->psw.addr & 1) || start >= end) {
printk("%s Code: Bad PSW.\n", mode);
return;
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 1e3233eb510a..dd410962ecbe 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -17,6 +17,7 @@
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/debug.h>
#include <asm/dis.h>
@@ -41,60 +42,50 @@ const char *stack_type_name(enum stack_type type)
EXPORT_SYMBOL_GPL(stack_type_name);
static inline bool in_stack(unsigned long sp, struct stack_info *info,
- enum stack_type type, unsigned long low,
- unsigned long high)
+ enum stack_type type, unsigned long stack)
{
- if (sp < low || sp >= high)
+ if (sp < stack || sp >= stack + THREAD_SIZE)
return false;
info->type = type;
- info->begin = low;
- info->end = high;
+ info->begin = stack;
+ info->end = stack + THREAD_SIZE;
return true;
}
static bool in_task_stack(unsigned long sp, struct task_struct *task,
struct stack_info *info)
{
- unsigned long stack;
+ unsigned long stack = (unsigned long)task_stack_page(task);
- stack = (unsigned long) task_stack_page(task);
- return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE);
+ return in_stack(sp, info, STACK_TYPE_TASK, stack);
}
static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size, top;
+ unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- top = S390_lowcore.async_stack + frame_size;
- return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top);
+ return in_stack(sp, info, STACK_TYPE_IRQ, stack);
}
static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size, top;
+ unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- top = S390_lowcore.nodat_stack + frame_size;
- return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
+ return in_stack(sp, info, STACK_TYPE_NODAT, stack);
}
static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size, top;
+ unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- top = S390_lowcore.mcck_stack + frame_size;
- return in_stack(sp, info, STACK_TYPE_MCCK, top - THREAD_SIZE, top);
+ return in_stack(sp, info, STACK_TYPE_MCCK, stack);
}
static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size, top;
+ unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- top = S390_lowcore.restart_stack + frame_size;
- return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top);
+ return in_stack(sp, info, STACK_TYPE_RESTART, stack);
}
int get_stack_info(unsigned long sp, struct task_struct *task,
@@ -152,7 +143,13 @@ void show_stack(struct task_struct *task, unsigned long *stack,
static void show_last_breaking_event(struct pt_regs *regs)
{
printk("Last Breaking-Event-Address:\n");
- printk(" [<%016lx>] %pSR\n", regs->last_break, (void *)regs->last_break);
+ printk(" [<%016lx>] ", regs->last_break);
+ if (user_mode(regs)) {
+ print_vma_addr(KERN_CONT, regs->last_break);
+ pr_cont("\n");
+ } else {
+ pr_cont("%pSR\n", (void *)regs->last_break);
+ }
}
void show_registers(struct pt_regs *regs)
@@ -202,13 +199,8 @@ void __noreturn die(struct pt_regs *regs, const char *str)
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
- printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff,
regs->int_code >> 17, ++die_counter);
-#ifdef CONFIG_PREEMPT
- pr_cont("PREEMPT ");
-#elif defined(CONFIG_PREEMPT_RT)
- pr_cont("PREEMPT_RT ");
-#endif
pr_cont("SMP ");
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 432c8c987256..54cf0923050f 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -7,6 +7,8 @@
#define KMSG_COMPONENT "setup"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/sched/debug.h>
+#include <linux/cpufeature.h>
#include <linux/compiler.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -18,8 +20,13 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/asm-extable.h>
+#include <linux/memblock.h>
+#include <asm/access-regs.h>
+#include <asm/asm-offsets.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
@@ -30,24 +37,36 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
-#include <asm/switch_to.h>
#include "entry.h"
-int __bootdata(is_full_image);
+#define __decompressor_handled_param(func, param) \
+static int __init ignore_decompressor_param_##func(char *s) \
+{ \
+ return 0; \
+} \
+early_param(#param, ignore_decompressor_param_##func)
+
+#define decompressor_handled_param(param) __decompressor_handled_param(param, param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
+decompressor_handled_param(bootdebug);
+__decompressor_handled_param(debug_alternative, debug-alternative);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
+#endif
-static void __init reset_tod_clock(void)
+static void __init kasan_early_init(void)
{
- union tod_clock clk;
-
- if (store_tod_clock_ext_cc(&clk) == 0)
- return;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
- disabled_wait();
-
- memset(&tod_clock_base, 0, sizeof(tod_clock_base));
- tod_clock_base.tod = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+#ifdef CONFIG_KASAN
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized\n");
+#endif
}
/*
@@ -68,26 +87,6 @@ static noinline __init void init_kernel_storage_key(void)
static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
-static noinline __init void detect_machine_type(void)
-{
- struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
-
- /* Check current-configuration-level */
- if (stsi(NULL, 0, 0, 0) <= 2) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
- return;
- }
- /* Get virtual-machine cpu information. */
- if (stsi(vmms, 3, 2, 2) || !vmms->count)
- return;
-
- /* Detect known hypervisors */
- if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
- else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
- S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
-}
-
/* Remove leading, trailing and double whitespace. */
static inline void strim_all(char *str)
{
@@ -128,9 +127,9 @@ static noinline __init void setup_arch_string(void)
strim_all(hvstr);
} else {
sprintf(hvstr, "%s",
- MACHINE_IS_LPAR ? "LPAR" :
- MACHINE_IS_VM ? "z/VM" :
- MACHINE_IS_KVM ? "KVM" : "unknown");
+ machine_is_lpar() ? "LPAR" :
+ machine_is_vm() ? "z/VM" :
+ machine_is_kvm() ? "KVM" : "unknown");
}
dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
}
@@ -139,9 +138,8 @@ static __init void setup_topology(void)
{
int max_mnest;
- if (!test_facility(11))
+ if (!cpu_has_topology())
return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
@@ -149,103 +147,58 @@ static __init void setup_topology(void)
topology_max_mnest = max_mnest;
}
-void __do_early_pgm_check(struct pt_regs *regs)
+void __init __do_early_pgm_check(struct pt_regs *regs)
{
- if (!fixup_exception(regs))
- disabled_wait();
+ struct lowcore *lc = get_lowcore();
+ unsigned long ip;
+
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
+ ip = __rewind_psw(regs->psw, regs->int_code >> 16);
+
+ /* Monitor Event? Might be a warning */
+ if ((regs->int_code & PGM_INT_CODE_MASK) == 0x40) {
+ if (report_bug(ip, regs) == BUG_TRAP_TYPE_WARN)
+ return;
+ }
+ if (fixup_exception(regs))
+ return;
+ /*
+ * Unhandled exception - system cannot continue but try to get some
+ * helpful messages to the console. Use early_printk() to print
+ * some basic information in case it is too early for printk().
+ */
+ register_early_console();
+ early_printk("PANIC: early exception %04x PSW: %016lx %016lx\n",
+ regs->int_code & 0xffff, regs->psw.mask, regs->psw.addr);
+ show_regs(regs);
+ disabled_wait();
}
static noinline __init void setup_lowcore_early(void)
{
+ struct lowcore *lc = get_lowcore();
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- if (IS_ENABLED(CONFIG_KASAN))
- psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw = psw;
- S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
-}
-
-static noinline __init void setup_facility_list(void)
-{
- memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
- if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
- __clear_facility(82, alt_stfle_fac_list);
-}
-
-static __init void detect_diag9c(void)
-{
- unsigned int cpu_address;
- int rc;
-
- cpu_address = stap();
- diag_stat_inc(DIAG_STAT_X09C);
- asm volatile(
- " diag %2,0,0x9c\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
-}
-
-static __init void detect_machine_facilities(void)
-{
- if (test_facility(8)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
- __ctl_set_bit(0, 23);
- }
- if (test_facility(78))
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
- if (test_facility(3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(50) && test_facility(73)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
- __ctl_set_bit(0, 55);
- }
- if (test_facility(51))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
- if (test_facility(129)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
- __ctl_set_bit(0, 17);
- }
- if (test_facility(130) && !noexec_disabled) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
- __ctl_set_bit(0, 20);
- }
- if (test_facility(133))
- S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
- if (test_facility(139) && (tod_clock_base.tod >> 63)) {
- /* Enabled signed clock comparator comparisons */
- S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
- clock_comparator_max = -1ULL >> 1;
- __ctl_set_bit(0, 53);
- }
- if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
- /* the control bit is set during PCI initialization */
- }
+ psw.mask = PSW_KERNEL_BITS;
+ lc->program_new_psw = psw;
+ lc->preempt_count = INIT_PREEMPT_COUNT;
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
}
static inline void save_vector_registers(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (test_facility(129))
+ if (cpu_has_vx())
save_vx_regs(boot_cpu_vector_save_area);
#endif
}
-static inline void setup_control_registers(void)
+static inline void setup_low_address_protection(void)
{
- unsigned long reg;
-
- __ctl_store(reg, 0, 0);
- reg |= CR0_LOW_ADDRESS_PROTECTION;
- reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
- reg |= CR0_EXTERNAL_CALL_SUBMASK;
- __ctl_load(reg, 0, 0);
+ system_ctl_set_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
}
static inline void setup_access_registers(void)
@@ -255,30 +208,11 @@ static inline void setup_access_registers(void)
restore_access_regs(acrs);
}
-static int __init disable_vector_extension(char *str)
-{
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
- __ctl_clear_bit(0, 17);
- return 0;
-}
-early_param("novx", disable_vector_extension);
-
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
/* copy arch command line */
- strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
-}
-
-static void __init check_image_bootable(void)
-{
- if (is_full_image)
- return;
-
- sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
- sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
- sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
- disabled_wait();
+ strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
}
static void __init sort_amode31_extable(void)
@@ -288,24 +222,18 @@ static void __init sort_amode31_extable(void)
void __init startup_init(void)
{
- sclp_early_adjust_va();
- reset_tod_clock();
- check_image_bootable();
+ kasan_early_init();
time_early_init();
init_kernel_storage_key();
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
- detect_machine_type();
setup_arch_string();
setup_boot_command_line();
- detect_diag9c();
- detect_machine_facilities();
save_vector_registers();
setup_topology();
sclp_early_detect();
- setup_control_registers();
+ setup_low_address_protection();
setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index d9d53f44008a..cefe020a3be3 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
@@ -20,6 +21,16 @@ static struct console sclp_early_console = {
.index = -1,
};
+void __init register_early_console(void)
+{
+ if (early_console)
+ return;
+ if (!sclp.has_linemode && !sclp.has_vt220)
+ return;
+ early_console = &sclp_early_console;
+ register_console(early_console);
+}
+
static int __init setup_early_printk(char *buf)
{
if (early_console)
@@ -27,10 +38,7 @@ static int __init setup_early_printk(char *buf)
/* Accept only "earlyprintk" and "earlyprintk=sclp" */
if (buf && !str_has_prefix(buf, "sclp"))
return 0;
- if (!sclp.has_linemode && !sclp.has_vt220)
- return 0;
- early_console = &sclp_early_console;
- register_console(early_console);
+ register_early_console();
return 0;
}
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
deleted file mode 100644
index f521c6da37b8..000000000000
--- a/arch/s390/kernel/earlypgm.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2006, 2007
- * Author(s): Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-ENTRY(early_pgm_check_handler)
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- lgr %r2,%r11
- brasl %r14,__do_early_pgm_check
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- lpswe __LC_RETURN_PSW
-ENDPROC(early_pgm_check_handler)
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
index 7f8246c9be08..0e51fa537262 100644
--- a/arch/s390/kernel/ebcdic.c
+++ b/arch/s390/kernel/ebcdic.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * ECBDIC -> ASCII, ASCII -> ECBDIC,
+ * EBCDIC -> ASCII, ASCII -> EBCDIC,
* upper to lower case (EBCDIC) conversion tables.
*
* S390 version
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d2a1f2f4f5b8..0f00f4b06d51 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -8,10 +8,11 @@
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*/
+#include <linux/export.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@@ -23,62 +24,51 @@
#include <asm/page.h>
#include <asm/sigp.h>
#include <asm/irq.h>
-#include <asm/vx-insn.h>
+#include <asm/fpu-insn.h>
#include <asm/setup.h>
#include <asm/nmi.h>
-#include <asm/export.h>
#include <asm/nospec-insn.h>
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
-STACK_SIZE = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+#include <asm/lowcore.h>
+#include <asm/machine.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
- .endm
-
- .macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro MBEAR reg
- ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_FEATURE(MFEATURE_LOWCORE)
.endm
- .macro CHECK_STACK savearea
-#ifdef CONFIG_CHECK_STACK
- tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
- lghi %r14,\savearea
- jz stack_overflow
-#endif
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_VMAP_STACK savearea,oklabel
-#ifdef CONFIG_VMAP_STACK
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
lgr %r14,%r15
- nill %r14,0x10000 - STACK_SIZE
- oill %r14,STACK_INIT
- clg %r14,__LC_KERNEL_STACK
+ nill %r14,0x10000 - THREAD_SIZE
+ oill %r14,STACK_INIT_OFFSET
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_ASYNC_STACK
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_MCCK_STACK
+ clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_NODAT_STACK
+ clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_RESTART_STACK
+ clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
- lghi %r14,\savearea
- j stack_overflow
-#else
- j \oklabel
-#endif
+ la %r14,\savearea(\lowcore)
+ j stack_invalid
.endm
/*
@@ -104,151 +94,108 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "j .+12; nop; nop", 82
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
- "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
- .endm
-
- /*
- * The CHKSTG macro jumps to the provided label in case the
- * machine check interruption code reports one of unrecoverable
- * storage errors:
- * - Storage error uncorrected
- * - Storage key error uncorrected
- * - Storage degradation with Failing-storage-address validity
- */
- .macro CHKSTG errlabel
- TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
- jnz \errlabel
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
- jz .Loklabel\@
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
- jnz \errlabel
-.Loklabel\@:
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
- /*
- * The OUTSIDE macro jumps to the provided label in case the value
- * in the provided register is outside of the provided range. The
- * macro is useful for checking whether a PSW stored in a register
- * pair points inside or outside of a block of instructions.
- * @reg: register to check
- * @start: start of the range
- * @end: end of the range
- * @outside_label: jump here if @reg is outside of [@start..@end)
- */
- .macro OUTSIDE reg,start,end,outside_label
- lgr %r14,\reg
- larl %r13,\start
- slgr %r14,%r13
-#ifdef CONFIG_AS_IS_LLVM
- clgfrl %r14,.Lrange_size\@
-#else
- clgfi %r14,\end - \start
-#endif
- jhe \outside_label
-#ifdef CONFIG_AS_IS_LLVM
- .section .rodata, "a"
- .align 4
-.Lrange_size\@:
- .long \end - \start
- .previous
-#endif
- .endm
-
- .macro SIEEXIT
- lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
+ .macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ brasl %r14,stackleak_erase_on_task_stack
+#endif
+ .endm
+
GEN_BR_THUNK %r14
.section .kprobes.text, "ax"
.Ldummy:
/*
- * This nop exists only in order to avoid that __bpon starts at
- * the beginning of the kprobes text section. In that case we would
- * have several symbols at the same address. E.g. objdump would take
- * an arbitrary symbol name when disassembling this code.
- * With the added nop in between the __bpon symbol is unique
- * again.
+ * The following nop exists only in order to avoid that the next
+ * symbol starts at the beginning of the kprobes text section.
+ * In that case there would be several symbols at the same address.
+ * E.g. objdump would take an arbitrary symbol when disassembling
+ * the code.
+ * With the added nop in between this cannot happen.
*/
nop 0
-ENTRY(__bpon)
- .globl __bpon
- BPON
- BR_EX %r14
-ENDPROC(__bpon)
-
/*
- * Scheduler resume function, called by switch_to
- * gpr2 = (task_struct *) prev
- * gpr3 = (task_struct *) next
+ * Scheduler resume function, called by __switch_to
+ * gpr2 = (task_struct *)prev
+ * gpr3 = (task_struct *)next
* Returns:
* gpr2 = prev
*/
-ENTRY(__switch_to)
+SYM_FUNC_START(__switch_to_asm)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
lghi %r4,__TASK_stack
lghi %r1,__TASK_thread
- llill %r5,STACK_INIT
+ llill %r5,STACK_INIT_OFFSET
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
aghi %r3,__TASK_pid
- mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
-ENDPROC(__switch_to)
+SYM_FUNC_END(__switch_to_asm)
#if IS_ENABLED(CONFIG_KVM)
/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
+ * %r5 guest asce
*/
-ENTRY(sie64a)
+SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- lg %r12,__LC_CURRENT
- stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer
- stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
+ stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
+ stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
+ stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
- mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
- lmg %r0,%r13,0(%r3) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
+ lmg %r0,%r13,0(%r4) # load guest gprs 0-13
+ mvi __TI_sie(%r14),1
+ lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
jnz .Lsie_skip
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- jo .Lsie_skip # exit if fp/vx regs changed
- BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr
+ BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
.Lsie_entry:
sie 0(%r14)
# Let the next instruction be NOP to avoid triggering a machine check
@@ -256,24 +203,15 @@ ENTRY(sie64a)
nopr 7
.Lsie_leave:
BPOFF
- BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
.Lsie_skip:
+ lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
-.Lsie_done:
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
-# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-.Lrewind_pad6:
- nopr 7
-.Lrewind_pad4:
- nopr 7
-.Lrewind_pad2:
- nopr 7
- .globl sie_exit
-sie_exit:
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_USER_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
+SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
xgr %r0,%r0 # clear guest registers to
@@ -284,17 +222,8 @@ sie_exit:
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
BR_EX %r14
-.Lsie_fault:
- lghi %r14,-EFAULT
- stg %r14,__SF_SIE_REASON(%r15) # set exit reason code
- j sie_exit
-
- EX_TABLE(.Lrewind_pad6,.Lsie_fault)
- EX_TABLE(.Lrewind_pad4,.Lsie_fault)
- EX_TABLE(.Lrewind_pad2,.Lsie_fault)
- EX_TABLE(sie_exit,.Lsie_fault)
-ENDPROC(sie64a)
-EXPORT_SYMBOL(sie64a)
+SYM_FUNC_END(__sie64a)
+EXPORT_SYMBOL(__sie64a)
EXPORT_SYMBOL(sie_exit)
#endif
@@ -303,19 +232,17 @@ EXPORT_SYMBOL(sie_exit)
* are entered with interrupts disabled.
*/
-ENTRY(system_call)
- stpt __LC_SYS_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+SYM_CODE_START(system_call)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
- STBEAR __LC_LAST_BREAK
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r12,__LC_CURRENT
- lg %r15,__LC_KERNEL_STACK
+ STBEAR __LC_LAST_BREAK(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
# clear user controlled register to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -328,74 +255,71 @@ ENTRY(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
- MBEAR %r2
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
+ MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+ STACKLEAK_ERASE
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(system_call)
+SYM_CODE_END(system_call)
#
# a new process exits the kernel with ret_from_fork
#
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
- BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+ STACKLEAK_ERASE
+ GET_LC %r13
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
/*
* Program check handler routine
*/
-ENTRY(pgm_check_handler)
- stpt __LC_SYS_ENTER_TIMER
+SYM_CODE_START(pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
- lg %r12,__LC_CURRENT
- lghi %r10,0
- lmg %r8,%r9,__LC_PGM_OLD_PSW
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
+ xgr %r10,%r10
tmhh %r8,0x0001 # coming from user space?
- jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- j 3f # -> fault in user space
-.Lpgm_skip_asce:
+ jo 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
- # cleanup critical section for program checks in sie64a
- OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f
- SIEEXIT
+ lg %r11,__LC_CURRENT(%r13)
+ tm __TI_sie(%r11),0xff
+ jz 1f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
lghi %r10,_PIF_GUEST_FAULT
#endif
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- # CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
- lg %r15,__LC_KERNEL_STACK
+2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ # CHECK_VMAP_STACK branches to stack_invalid or 4f
+ CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
stg %r10,__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
stmg %r8,%r9,__PT_PSW(%r11)
-
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -404,15 +328,16 @@ ENTRY(pgm_check_handler)
xgr %r5,%r5
xgr %r6,%r6
xgr %r7,%r7
+ xgr %r12,%r12
lgr %r2,%r11
brasl %r14,__do_pgm_check
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
- lctlg %c1,%c1,__LC_USER_ASCE
- BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
- stpt __LC_EXIT_TIMER
+ STACKLEAK_ERASE
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -421,39 +346,38 @@ ENTRY(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
- stg %r14,__LC_RETURN_PSW+8
+ stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
- LBEAR __LC_PGM_LAST_BREAK
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
-ENDPROC(pgm_check_handler)
+SYM_CODE_END(pgm_check_handler)
/*
* Interrupt handler macro used for external and IO interrupts.
*/
.macro INT_HANDLER name,lc_old_psw,handler
-ENTRY(\name)
- stckf __LC_INT_CLOCK
- stpt __LC_SYS_ENTER_TIMER
- STBEAR __LC_LAST_BREAK
+SYM_CODE_START(\name)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lg %r12,__LC_CURRENT
- lmg %r8,%r9,\lc_old_psw
+ lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
- OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f
- BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
- SIEEXIT
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
+ jz 0f
+ BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+1: lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -467,114 +391,85 @@ ENTRY(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- MBEAR %r11
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
- lctlg %c1,%c1,__LC_USER_ASCE
- BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
- stpt __LC_EXIT_TIMER
+ STACKLEAK_ERASE
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
-ENDPROC(\name)
+SYM_CODE_END(\name)
.endm
+ .section .irqentry.text, "ax"
+
INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
-/*
- * Load idle PSW.
- */
-ENTRY(psw_idle)
- stg %r14,(__SF_GPRS+8*8)(%r15)
- stg %r3,__SF_EMPTY(%r15)
- larl %r1,psw_idle_exit
- stg %r1,__SF_EMPTY+8(%r15)
- larl %r1,smp_cpu_mtid
- llgf %r1,0(%r1)
- ltgr %r1,%r1
- jz .Lpsw_idle_stcctm
- .insn rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
-.Lpsw_idle_stcctm:
- oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
- BPON
- stckf __CLOCK_IDLE_ENTER(%r2)
- stpt __TIMER_IDLE_ENTER(%r2)
- lpswe __SF_EMPTY(%r15)
-.globl psw_idle_exit
-psw_idle_exit:
- BR_EX %r14
-ENDPROC(psw_idle)
+ .section .kprobes.text, "ax"
/*
* Machine check handler routines
*/
-ENTRY(mcck_int_handler)
- stckf __LC_MCCK_CLOCK
+SYM_CODE_START(mcck_int_handler)
BPOFF
- la %r1,4095 # validate r1
- spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
- LBEAR __LC_LAST_BREAK_SAVE_AREA-4095(%r1) # validate bear
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
- lg %r12,__LC_CURRENT
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
- la %r14,4095
- lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs
ptlb
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYS_ENTER_TIMER
- clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
- jnz 6f
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ jnz .Lmcck_user
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
- OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f
- OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
- j 5f
-4: CHKSTG .Lmcck_panic
-5: larl %r14,.Lstosm_tmp
- stosm 0(%r14),0x04 # turn dat on, keep irqs off
- BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
- SIEEXIT
- j .Lmcck_stack
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
+ jz .Lmcck_user
+ # Need to compare the address instead of __TI_SIE flag.
+ # Otherwise there would be a race between setting the flag
+ # and entering SIE (or leaving and clearing the flag). This
+ # would cause machine checks targeted at the guest to be
+ # handled by the host.
+ larl %r14,.Lsie_entry
+ clgrjl %r9,%r14, 4f
+ larl %r14,.Lsie_leave
+ clgrjhe %r9,%r14, 4f
+ lg %r10,__LC_PCPU(%r13)
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
+4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-6: CHKSTG .Lmcck_panic
- larl %r14,.Lstosm_tmp
- stosm 0(%r14),0x04 # turn dat on, keep irqs off
- tmhh %r8,0x0001 # interrupting from user ?
- jz .Lmcck_stack
- BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-.Lmcck_stack:
- lg %r15,__LC_MCCK_STACK
+.Lmcck_user:
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -584,30 +479,19 @@ ENTRY(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
- mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- cghi %r2,0
- je .Lmcck_return
- lg %r1,__LC_KERNEL_STACK # switch to kernel stack
- mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
- xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
- la %r11,STACK_FRAME_OVERHEAD(%r1)
- lgr %r2,%r11
- lgr %r15,%r1
- brasl %r14,s390_handle_mcck
-.Lmcck_return:
- lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
- BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
- stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+ BPON
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -621,10 +505,10 @@ ENTRY(mcck_int_handler)
*/
lhi %r5,0
lhi %r6,1
- larl %r7,.Lstop_lock
+ larl %r7,stop_lock
cs %r5,%r6,0(%r7) # single CPU-stopper only
jnz 4f
- larl %r7,.Lthis_cpu
+ larl %r7,this_cpu
stap 0(%r7) # this CPU address
lh %r4,0(%r7)
nilh %r4,0
@@ -640,26 +524,28 @@ ENTRY(mcck_int_handler)
3: sigp %r1,%r4,SIGP_STOP # stop this CPU
brc SIGP_CC_BUSY,3b
4: j 4b
-ENDPROC(mcck_int_handler)
+SYM_CODE_END(mcck_int_handler)
-ENTRY(restart_int_handler)
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+SYM_CODE_START(restart_int_handler)
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
- la %r15,4095
- lctlg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r15)
-0: larl %r15,.Lstosm_tmp
- stosm 0(%r15),0x04 # turn dat on, keep irqs off
- lg %r15,__LC_RESTART_STACK
+ lctlg %c0,%c15,__LC_CREGS_SAVE_AREA
+0: larl %r15,daton_psw
+ lpswe 0(%r15) # turn dat on, keep irqs off
+.Ldaton:
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lgf %r3,__LC_RESTART_SOURCE
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@@ -670,46 +556,70 @@ ENTRY(restart_int_handler)
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
brc 2,2b
3: j 3b
-ENDPROC(restart_int_handler)
+SYM_CODE_END(restart_int_handler)
+
+ __INIT
+SYM_CODE_START(early_pgm_check_handler)
+ STMG_LC %r8,%r15,__LC_SAVE_AREA
+ GET_LC %r13
+ aghi %r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_PSW(16,%r11),__LC_PGM_OLD_PSW(%r13)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
+ lgr %r2,%r11
+ brasl %r14,__do_early_pgm_check
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+ LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(early_pgm_check_handler)
+ __FINIT
.section .kprobes.text, "ax"
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
+ * The synchronous or the asynchronous stack pointer is invalid. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-ENTRY(stack_overflow)
- lg %r15,__LC_NODAT_STACK # change to panic stack
+SYM_CODE_START(stack_invalid)
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(64,%r11),0(%r14)
- stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ GET_LC %r2
+ mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
- jg kernel_stack_overflow
-ENDPROC(stack_overflow)
-#endif
+ jg kernel_stack_invalid
+SYM_CODE_END(stack_invalid)
.section .data, "aw"
- .align 4
-.Lstop_lock: .long 0
-.Lthis_cpu: .short 0
-.Lstosm_tmp: .byte 0
+ .balign 4
+SYM_DATA_LOCAL(stop_lock, .long 0)
+SYM_DATA_LOCAL(this_cpu, .short 0)
+ .balign 8
+SYM_DATA_START_LOCAL(daton_psw)
+ .quad PSW_KERNEL_BITS
+ .quad .Ldaton
+SYM_DATA_END(daton_psw)
+
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
- .globl sys_call_table
-sys_call_table:
-#include "asm/syscall_table.h"
+SYM_DATA_START(sys_call_table)
+#include <asm/syscall_table.h>
+SYM_DATA_END(sys_call_table)
#undef SYSCALL
#ifdef CONFIG_COMPAT
#define SYSCALL(esame,emu) .quad __s390_ ## emu
- .globl sys_call_table_emu
-sys_call_table_emu:
-#include "asm/syscall_table.h"
+SYM_DATA_START(sys_call_table_emu)
+#include <asm/syscall_table.h>
+SYM_DATA_END(sys_call_table_emu)
#undef SYSCALL
#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 995ec7449feb..dd55cc6bbc28 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -19,6 +19,7 @@ void mcck_int_handler(void);
void restart_int_handler(void);
void early_pgm_check_handler(void);
+struct task_struct *__switch_to_asm(struct task_struct *prev, struct task_struct *next);
void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
void __do_pgm_check(struct pt_regs *regs);
void __do_syscall(struct pt_regs *regs, int per_trap);
@@ -30,19 +31,16 @@ void do_secure_storage_access(struct pt_regs *regs);
void do_non_secure_storage_access(struct pt_regs *regs);
void do_secure_storage_violation(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
-void kernel_stack_overflow(struct pt_regs * regs);
+void kernel_stack_invalid(struct pt_regs *regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
-void __init init_IRQ(void);
void do_io_irq(struct pt_regs *regs);
void do_ext_irq(struct pt_regs *regs);
void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
@@ -73,6 +71,5 @@ extern struct exception_table_entry _stop_amode31_ex_table[];
#define __amode31_data __section(".amode31.data")
#define __amode31_ref __section(".amode31.refs")
extern long _start_amode31_refs[], _end_amode31_refs[];
-extern unsigned long __amode31_base;
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000000..f02127219a27
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+ static unsigned int size;
+ unsigned int r;
+ u64 dummy;
+
+ r = READ_ONCE(size);
+ if (!r) {
+ r = __stfle_asm(&dummy, 1) + 1;
+ WRITE_ONCE(size, r);
+ }
+ return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index d864c9a325e2..6f2e87920288 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -8,258 +8,186 @@
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/sched.h>
-#include <asm/fpu/types.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
-asm(".include \"asm/vx-insn.h\"\n");
-
-void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
/*
* Limit the save to the FPU/vector registers already
- * in use by the previous context
+ * in use by the previous context.
*/
- flags &= state->mask;
-
+ flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
- /* Save floating point control */
- asm volatile("stfpc %0" : "=Q" (state->fpc));
-
- if (!MACHINE_HAS_VX) {
- if (flags & KERNEL_VXR_V0V7) {
- /* Save floating-point registers */
- asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
- asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
- asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
- asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
- asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
- asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
- asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
- asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
- asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
- asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
- asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
- asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
- asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
- asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
- asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
- asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
- }
+ fpu_stfpc(&state->hdr.fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(vxrs);
return;
}
-
- /* Test and save vector registers */
- asm volatile (
- /*
- * Test if any vector register must be saved and, if so,
- * test if all register can be saved.
- */
- " la 1,%[vxrs]\n" /* load save area */
- " tmll %[m],30\n" /* KERNEL_VXR */
- " jz 7f\n" /* no work -> done */
- " jo 5f\n" /* -> save V0..V31 */
- /*
- * Test for special case KERNEL_FPU_MID only. In this
- * case a vstm V8..V23 is the best instruction
- */
- " chi %[m],12\n" /* KERNEL_VXR_MID */
- " jne 0f\n" /* -> save V8..V23 */
- " VSTM 8,23,128,1\n" /* vstm %v8,%v23,128(%r1) */
- " j 7f\n"
- /* Test and save the first half of 16 vector registers */
- "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
- " jz 3f\n" /* -> KERNEL_VXR_HIGH */
- " jo 2f\n" /* 11 -> save V0..V15 */
- " brc 2,1f\n" /* 10 -> save V8..V15 */
- " VSTM 0,7,0,1\n" /* vstm %v0,%v7,0(%r1) */
- " j 3f\n"
- "1: VSTM 8,15,128,1\n" /* vstm %v8,%v15,128(%r1) */
- " j 3f\n"
- "2: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
- /* Test and save the second half of 16 vector registers */
- "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
- " jz 7f\n"
- " jo 6f\n" /* 11 -> save V16..V31 */
- " brc 2,4f\n" /* 10 -> save V24..V31 */
- " VSTM 16,23,256,1\n" /* vstm %v16,%v23,256(%r1) */
- " j 7f\n"
- "4: VSTM 24,31,384,1\n" /* vstm %v24,%v31,384(%r1) */
- " j 7f\n"
- "5: VSTM 0,15,0,1\n" /* vstm %v0,%v15,0(%r1) */
- "6: VSTM 16,31,256,1\n" /* vstm %v16,%v31,256(%r1) */
- "7:"
- : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
- : [m] "d" (flags)
- : "1", "cc");
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vstm(0, 15, vxrs);
+ vxrs += fpu_vstm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vstm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vstm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vstm(0, 7, vxrs);
+ else
+ vxrs += fpu_vstm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vstm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vstm(16, 23, vxrs);
+ else
+ vxrs += fpu_vstm(24, 31, vxrs);
+ }
}
EXPORT_SYMBOL(__kernel_fpu_begin);
-void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+void __kernel_fpu_end(struct kernel_fpu *state, int flags)
{
+ __vector128 *vxrs = state->vxrs;
+ int mask;
+
/*
* Limit the restore to the FPU/vector registers of the
- * previous context that have been overwritte by the
- * current context
+ * previous context that have been overwritten by the
+ * current context.
*/
- flags &= state->mask;
-
+ flags &= state->hdr.mask;
if (flags & KERNEL_FPC)
- /* Restore floating-point controls */
- asm volatile("lfpc %0" : : "Q" (state->fpc));
-
- if (!MACHINE_HAS_VX) {
- if (flags & KERNEL_VXR_V0V7) {
- /* Restore floating-point registers */
- asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
- asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
- asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
- asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
- asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
- asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
- asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
- asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
- asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
- asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
- asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
- asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
- asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
- asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
- asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
- asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
- }
+ fpu_lfpc(&state->hdr.fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ load_fp_regs_vx(vxrs);
return;
}
-
- /* Test and restore (load) vector registers */
- asm volatile (
- /*
- * Test if any vector register must be loaded and, if so,
- * test if all registers can be loaded at once.
- */
- " la 1,%[vxrs]\n" /* load restore area */
- " tmll %[m],30\n" /* KERNEL_VXR */
- " jz 7f\n" /* no work -> done */
- " jo 5f\n" /* -> restore V0..V31 */
- /*
- * Test for special case KERNEL_FPU_MID only. In this
- * case a vlm V8..V23 is the best instruction
- */
- " chi %[m],12\n" /* KERNEL_VXR_MID */
- " jne 0f\n" /* -> restore V8..V23 */
- " VLM 8,23,128,1\n" /* vlm %v8,%v23,128(%r1) */
- " j 7f\n"
- /* Test and restore the first half of 16 vector registers */
- "0: tmll %[m],6\n" /* KERNEL_VXR_LOW */
- " jz 3f\n" /* -> KERNEL_VXR_HIGH */
- " jo 2f\n" /* 11 -> restore V0..V15 */
- " brc 2,1f\n" /* 10 -> restore V8..V15 */
- " VLM 0,7,0,1\n" /* vlm %v0,%v7,0(%r1) */
- " j 3f\n"
- "1: VLM 8,15,128,1\n" /* vlm %v8,%v15,128(%r1) */
- " j 3f\n"
- "2: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
- /* Test and restore the second half of 16 vector registers */
- "3: tmll %[m],24\n" /* KERNEL_VXR_HIGH */
- " jz 7f\n"
- " jo 6f\n" /* 11 -> restore V16..V31 */
- " brc 2,4f\n" /* 10 -> restore V24..V31 */
- " VLM 16,23,256,1\n" /* vlm %v16,%v23,256(%r1) */
- " j 7f\n"
- "4: VLM 24,31,384,1\n" /* vlm %v24,%v31,384(%r1) */
- " j 7f\n"
- "5: VLM 0,15,0,1\n" /* vlm %v0,%v15,0(%r1) */
- "6: VLM 16,31,256,1\n" /* vlm %v16,%v31,256(%r1) */
- "7:"
- : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
- : [m] "d" (flags)
- : "1", "cc");
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ vxrs += fpu_vlm(0, 15, vxrs);
+ vxrs += fpu_vlm(16, 31, vxrs);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ vxrs += fpu_vlm(8, 23, vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ vxrs += fpu_vlm(0, 15, vxrs);
+ else if (mask == KERNEL_VXR_V0V7)
+ vxrs += fpu_vlm(0, 7, vxrs);
+ else
+ vxrs += fpu_vlm(8, 15, vxrs);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ vxrs += fpu_vlm(16, 31, vxrs);
+ else if (mask == KERNEL_VXR_V16V23)
+ vxrs += fpu_vlm(16, 23, vxrs);
+ else
+ vxrs += fpu_vlm(24, 31, vxrs);
+ }
}
EXPORT_SYMBOL(__kernel_fpu_end);
-void __load_fpu_regs(void)
+void load_fpu_state(struct fpu *state, int flags)
{
- struct fpu *state = &current->thread.fpu;
- unsigned long *regs = current->thread.fpu.regs;
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
- asm volatile("lfpc %0" : : "Q" (state->fpc));
- if (likely(MACHINE_HAS_VX)) {
- asm volatile("lgr 1,%0\n"
- "VLM 0,15,0,1\n"
- "VLM 16,31,256,1\n"
- :
- : "d" (regs)
- : "1", "cc", "memory");
- } else {
- asm volatile("ld 0,%0" : : "Q" (regs[0]));
- asm volatile("ld 1,%0" : : "Q" (regs[1]));
- asm volatile("ld 2,%0" : : "Q" (regs[2]));
- asm volatile("ld 3,%0" : : "Q" (regs[3]));
- asm volatile("ld 4,%0" : : "Q" (regs[4]));
- asm volatile("ld 5,%0" : : "Q" (regs[5]));
- asm volatile("ld 6,%0" : : "Q" (regs[6]));
- asm volatile("ld 7,%0" : : "Q" (regs[7]));
- asm volatile("ld 8,%0" : : "Q" (regs[8]));
- asm volatile("ld 9,%0" : : "Q" (regs[9]));
- asm volatile("ld 10,%0" : : "Q" (regs[10]));
- asm volatile("ld 11,%0" : : "Q" (regs[11]));
- asm volatile("ld 12,%0" : : "Q" (regs[12]));
- asm volatile("ld 13,%0" : : "Q" (regs[13]));
- asm volatile("ld 14,%0" : : "Q" (regs[14]));
- asm volatile("ld 15,%0" : : "Q" (regs[15]));
+ if (flags & KERNEL_FPC)
+ fpu_lfpc_safe(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_V0V7)
+ load_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vlm(0, 15, &vxrs[0]);
+ fpu_vlm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vlm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vlm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vlm(0, 7, &vxrs[0]);
+ else
+ fpu_vlm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vlm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vlm(16, 23, &vxrs[16]);
+ else
+ fpu_vlm(24, 31, &vxrs[24]);
}
- clear_cpu_flag(CIF_FPU);
-}
-EXPORT_SYMBOL(__load_fpu_regs);
-
-void load_fpu_regs(void)
-{
- raw_local_irq_disable();
- __load_fpu_regs();
- raw_local_irq_enable();
}
-EXPORT_SYMBOL(load_fpu_regs);
-void save_fpu_regs(void)
+void save_fpu_state(struct fpu *state, int flags)
{
- unsigned long flags, *regs;
- struct fpu *state;
-
- local_irq_save(flags);
+ __vector128 *vxrs = &state->vxrs[0];
+ int mask;
- if (test_cpu_flag(CIF_FPU))
- goto out;
-
- state = &current->thread.fpu;
- regs = current->thread.fpu.regs;
-
- asm volatile("stfpc %0" : "=Q" (state->fpc));
- if (likely(MACHINE_HAS_VX)) {
- asm volatile("lgr 1,%0\n"
- "VSTM 0,15,0,1\n"
- "VSTM 16,31,256,1\n"
- :
- : "d" (regs)
- : "1", "cc", "memory");
- } else {
- asm volatile("std 0,%0" : "=Q" (regs[0]));
- asm volatile("std 1,%0" : "=Q" (regs[1]));
- asm volatile("std 2,%0" : "=Q" (regs[2]));
- asm volatile("std 3,%0" : "=Q" (regs[3]));
- asm volatile("std 4,%0" : "=Q" (regs[4]));
- asm volatile("std 5,%0" : "=Q" (regs[5]));
- asm volatile("std 6,%0" : "=Q" (regs[6]));
- asm volatile("std 7,%0" : "=Q" (regs[7]));
- asm volatile("std 8,%0" : "=Q" (regs[8]));
- asm volatile("std 9,%0" : "=Q" (regs[9]));
- asm volatile("std 10,%0" : "=Q" (regs[10]));
- asm volatile("std 11,%0" : "=Q" (regs[11]));
- asm volatile("std 12,%0" : "=Q" (regs[12]));
- asm volatile("std 13,%0" : "=Q" (regs[13]));
- asm volatile("std 14,%0" : "=Q" (regs[14]));
- asm volatile("std 15,%0" : "=Q" (regs[15]));
+ if (flags & KERNEL_FPC)
+ fpu_stfpc(&state->fpc);
+ if (!cpu_has_vx()) {
+ if (flags & KERNEL_VXR_LOW)
+ save_fp_regs_vx(state->vxrs);
+ return;
+ }
+ mask = flags & KERNEL_VXR;
+ if (mask == KERNEL_VXR) {
+ fpu_vstm(0, 15, &vxrs[0]);
+ fpu_vstm(16, 31, &vxrs[16]);
+ return;
+ }
+ if (mask == KERNEL_VXR_MID) {
+ fpu_vstm(8, 23, &vxrs[8]);
+ return;
+ }
+ mask = flags & KERNEL_VXR_LOW;
+ if (mask) {
+ if (mask == KERNEL_VXR_LOW)
+ fpu_vstm(0, 15, &vxrs[0]);
+ else if (mask == KERNEL_VXR_V0V7)
+ fpu_vstm(0, 7, &vxrs[0]);
+ else
+ fpu_vstm(8, 15, &vxrs[8]);
+ }
+ mask = flags & KERNEL_VXR_HIGH;
+ if (mask) {
+ if (mask == KERNEL_VXR_HIGH)
+ fpu_vstm(16, 31, &vxrs[16]);
+ else if (mask == KERNEL_VXR_V16V23)
+ fpu_vstm(16, 23, &vxrs[16]);
+ else
+ fpu_vstm(24, 31, &vxrs[24]);
}
- set_cpu_flag(CIF_FPU);
-out:
- local_irq_restore(flags);
}
-EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(save_fpu_state);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 416b5a94353d..e94bb98f5231 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,15 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/kmsan-checks.h>
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/text-patching.h>
@@ -49,30 +51,6 @@ struct ftrace_insn {
s32 disp;
} __packed;
-asm(
- " .align 16\n"
- "ftrace_shared_hotpatch_trampoline_br:\n"
- " lmg %r0,%r1,2(%r1)\n"
- " br %r1\n"
- "ftrace_shared_hotpatch_trampoline_br_end:\n"
-);
-
-#ifdef CONFIG_EXPOLINE
-asm(
- " .align 16\n"
- "ftrace_shared_hotpatch_trampoline_exrl:\n"
- " lmg %r0,%r1,2(%r1)\n"
- " exrl %r0,0f\n"
- " j .\n"
- "0: br %r1\n"
- "ftrace_shared_hotpatch_trampoline_exrl_end:\n"
-);
-#endif /* CONFIG_EXPOLINE */
-
-#ifdef CONFIG_MODULES
-static char *ftrace_plt;
-#endif /* CONFIG_MODULES */
-
static const char *ftrace_shared_hotpatch_trampoline(const char **end)
{
const char *tstart, *tend;
@@ -92,19 +70,20 @@ static const char *ftrace_shared_hotpatch_trampoline(const char **end)
bool ftrace_need_init_nop(void)
{
- return true;
+ return !cpu_has_seq_insn();
}
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
__ftrace_hotpatch_trampolines_start;
- static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+ static const struct ftrace_insn orig = { .opc = 0xc004, .disp = 0 };
static struct ftrace_hotpatch_trampoline *trampoline;
struct ftrace_hotpatch_trampoline **next_trampoline;
struct ftrace_hotpatch_trampoline *trampolines_end;
struct ftrace_hotpatch_trampoline tmp;
struct ftrace_insn *insn;
+ struct ftrace_insn old;
const char *shared;
s32 disp;
@@ -118,7 +97,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
if (mod) {
next_trampoline = &mod->arch.next_trampoline;
trampolines_end = mod->arch.trampolines_end;
- shared = ftrace_plt;
}
#endif
@@ -126,8 +104,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
return -ENOMEM;
trampoline = (*next_trampoline)++;
+ if (copy_from_kernel_nofault(&old, (void *)rec->ip, sizeof(old)))
+ return -EFAULT;
/* Check for the compiler-generated fentry nop (brcl 0, .). */
- if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+ if (WARN_ON_ONCE(memcmp(&orig, &old, sizeof(old))))
return -EINVAL;
/* Generate the trampoline. */
@@ -163,8 +143,35 @@ static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrac
return trampoline;
}
-int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
- unsigned long addr)
+static inline struct ftrace_insn
+ftrace_generate_branch_insn(unsigned long ip, unsigned long target)
+{
+ /* brasl r0,target or brcl 0,0 */
+ return (struct ftrace_insn){ .opc = target ? 0xc005 : 0xc004,
+ .disp = target ? (target - ip) / 2 : 0 };
+}
+
+static int ftrace_patch_branch_insn(unsigned long ip, unsigned long old_target,
+ unsigned long target)
+{
+ struct ftrace_insn orig = ftrace_generate_branch_insn(ip, old_target);
+ struct ftrace_insn new = ftrace_generate_branch_insn(ip, target);
+ struct ftrace_insn old;
+
+ if (!IS_ALIGNED(ip, 8))
+ return -EINVAL;
+ if (copy_from_kernel_nofault(&old, (void *)ip, sizeof(old)))
+ return -EFAULT;
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *)ip, &new, sizeof(new));
+ return 0;
+}
+
+static int ftrace_modify_trampoline_call(struct dyn_ftrace *rec,
+ unsigned long old_addr,
+ unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
u64 old;
@@ -180,6 +187,15 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
return 0;
}
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, old_addr, addr);
+ else
+ return ftrace_modify_trampoline_call(rec, old_addr, addr);
+}
+
static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
{
u16 old;
@@ -198,11 +214,14 @@ static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- /* Expect brcl 0xf,... */
- return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+ /* Expect brcl 0xf,... for the !cpu_has_seq_insn() case */
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, addr, 0);
+ else
+ return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
}
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static int ftrace_make_trampoline_call(struct dyn_ftrace *rec, unsigned long addr)
{
struct ftrace_hotpatch_trampoline *trampoline;
@@ -214,6 +233,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
}
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ if (cpu_has_seq_insn())
+ return ftrace_patch_branch_insn(rec->ip, 0, addr);
+ else
+ return ftrace_make_trampoline_call(rec, addr);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
ftrace_func = func;
@@ -234,75 +261,20 @@ void ftrace_arch_code_modify_post_process(void)
text_poke_sync_lock();
}
-#ifdef CONFIG_MODULES
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int __init ftrace_plt_init(void)
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- const char *start, *end;
-
- ftrace_plt = module_alloc(PAGE_SIZE);
- if (!ftrace_plt)
- panic("cannot allocate ftrace plt\n");
-
- start = ftrace_shared_hotpatch_trampoline(&end);
- memcpy(ftrace_plt, start, end - start);
- set_memory_ro((unsigned long)ftrace_plt, 1);
- return 0;
-}
-device_initcall(ftrace_plt_init);
-
-#endif /* CONFIG_MODULES */
+ unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
+ unsigned long sp = arch_ftrace_regs(fregs)->regs.gprs[15];
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * Hook the return address and push it in the stack of return addresses
- * in current thread info.
- */
-unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
- unsigned long ip)
-{
if (unlikely(ftrace_graph_is_dead()))
- goto out;
+ return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
- goto out;
- ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(ra, ip, 0, (void *) sp))
- ra = (unsigned long) return_to_handler;
-out:
- return ra;
-}
-NOKPROBE_SYMBOL(prepare_ftrace_return);
-
-/*
- * Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative on condition. To enable the ftrace graph code
- * block, we simply patch the mask field of the instruction to zero and
- * turn the instruction into a nop.
- * To disable the ftrace graph code the mask field will be patched to
- * all ones, which turns the instruction into an unconditional branch.
- */
-int ftrace_enable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0xf,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- int rc;
-
- /* Expect brc 0x0,... */
- rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
- if (rc)
- return rc;
- text_poke_sync_lock();
- return 0;
+ return;
+ if (!function_graph_enter_regs(*parent, ip, 0, (unsigned long *)sp, fregs))
+ *parent = (unsigned long)&return_to_handler;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -316,10 +288,14 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
+ kmsan_unpoison_memory(fregs, ftrace_regs_size());
regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)ip);
if (!regs || unlikely(!p) || kprobe_disabled(p))
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
index 7f75a9616406..23337065f402 100644
--- a/arch/s390/kernel/ftrace.h
+++ b/arch/s390/kernel/ftrace.h
@@ -18,7 +18,5 @@ extern const char ftrace_shared_hotpatch_trampoline_br[];
extern const char ftrace_shared_hotpatch_trampoline_br_end[];
extern const char ftrace_shared_hotpatch_trampoline_exrl[];
extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
-extern const char ftrace_plt_template[];
-extern const char ftrace_plt_template_end[];
#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
index d14dd1c2e524..cf26d7a37425 100644
--- a/arch/s390/kernel/guarded_storage.c
+++ b/arch/s390/kernel/guarded_storage.c
@@ -4,6 +4,7 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
+#include <linux/cpufeature.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
@@ -28,7 +29,7 @@ static int gs_enable(void)
return -ENOMEM;
gs_cb->gsd = 25;
preempt_disable();
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
load_gs_cb(gs_cb);
current->thread.gs_cb = gs_cb;
preempt_enable();
@@ -42,7 +43,7 @@ static int gs_disable(void)
preempt_disable();
kfree(current->thread.gs_cb);
current->thread.gs_cb = NULL;
- __ctl_clear_bit(2, 4);
+ local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
preempt_enable();
}
return 0;
@@ -84,7 +85,7 @@ void gs_load_bc_cb(struct pt_regs *regs)
if (gs_cb) {
kfree(current->thread.gs_cb);
current->thread.gs_bc_cb = NULL;
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
load_gs_cb(gs_cb);
current->thread.gs_cb = gs_cb;
}
@@ -109,7 +110,7 @@ static int gs_broadcast(void)
SYSCALL_DEFINE2(s390_guarded_storage, int, command,
struct gs_cb __user *, gs_cb)
{
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -EOPNOTSUPP;
switch (command) {
case GS_ENABLE:
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7b8b6ad574d..7edb9ded199c 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -10,32 +10,31 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/ptrace.h>
__HEAD
-ENTRY(startup_continue)
- larl %r1,tod_clock_base
- mvc 0(16,%r1),__LC_BOOT_CLOCK
+SYM_CODE_START(startup_continue)
#
# Setup stack
#
+ GET_LC %r2
larl %r14,init_task
- stg %r14,__LC_CURRENT
- larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
-#ifdef CONFIG_KASAN
- brasl %r14,kasan_early_init
-#endif
+ stg %r14,__LC_CURRENT(%r2)
+ larl %r15,init_thread_union+STACK_INIT_OFFSET
+ stg %r15,__LC_KERNEL_STACK(%r2)
+ brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code
#
# We returned from start_kernel ?!? PANIK
#
basr %r13,0
- lpswe .Ldw-.(%r13) # load disabled wait psw
+ lpswe dw_psw-.(%r13) # load disabled wait psw
+SYM_CODE_END(startup_continue)
- .align 16
-.LPG1:
-.Ldw: .quad 0x0002000180000000,0x0000000000000000
+ .balign 16
+SYM_DATA_LOCAL(dw_psw, .quad 0x0002000180000000,0x0000000000000000)
diff --git a/arch/s390/kernel/hiperdispatch.c b/arch/s390/kernel/hiperdispatch.c
new file mode 100644
index 000000000000..e7b66d046e8d
--- /dev/null
+++ b/arch/s390/kernel/hiperdispatch.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ */
+
+#define KMSG_COMPONENT "hd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+/*
+ * Hiperdispatch:
+ * Dynamically calculates the optimum number of high capacity COREs
+ * by considering the state the system is in. When hiperdispatch decides
+ * that a capacity update is necessary, it schedules a topology update.
+ * During topology updates the CPU capacities are always re-adjusted.
+ *
+ * There is two places where CPU capacities are being accessed within
+ * hiperdispatch.
+ * -> hiperdispatch's reoccuring work function reads CPU capacities to
+ * determine high capacity CPU count.
+ * -> during a topology update hiperdispatch's adjustment function
+ * updates CPU capacities.
+ * These two can run on different CPUs in parallel which can cause
+ * hiperdispatch to make wrong decisions. This can potentially cause
+ * some overhead by leading to extra rebuild_sched_domains() calls
+ * for correction. Access to capacities within hiperdispatch has to be
+ * serialized to prevent the overhead.
+ *
+ * Hiperdispatch decision making revolves around steal time.
+ * HD_STEAL_THRESHOLD value is taken as reference. Whenever steal time
+ * crosses the threshold value hiperdispatch falls back to giving high
+ * capacities to entitled CPUs. When steal time drops below the
+ * threshold boundary, hiperdispatch utilizes all CPUs by giving all
+ * of them high capacity.
+ *
+ * The theory behind HD_STEAL_THRESHOLD is related to the SMP thread
+ * performance. Comparing the throughput of;
+ * - single CORE, with N threads, running N tasks
+ * - N separate COREs running N tasks,
+ * using individual COREs for individual tasks yield better
+ * performance. This performance difference is roughly ~30% (can change
+ * between machine generations)
+ *
+ * Hiperdispatch tries to hint scheduler to use individual COREs for
+ * each task, as long as steal time on those COREs are less than 30%,
+ * therefore delaying the throughput loss caused by using SMP threads.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/kernel_stat.h>
+#include <linux/kstrtox.h>
+#include <linux/ktime.h>
+#include <linux/sysctl.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <asm/hiperdispatch.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+
+#define CREATE_TRACE_POINTS
+#include <asm/trace/hiperdispatch.h>
+
+#define HD_DELAY_FACTOR (4)
+#define HD_DELAY_INTERVAL (HZ / 4)
+#define HD_STEAL_THRESHOLD 30
+#define HD_STEAL_AVG_WEIGHT 16
+
+static cpumask_t hd_vl_coremask; /* Mask containing all vertical low COREs */
+static cpumask_t hd_vmvl_cpumask; /* Mask containing vertical medium and low CPUs */
+static int hd_high_capacity_cores; /* Current CORE count with high capacity */
+static int hd_entitled_cores; /* Total vertical high and medium CORE count */
+static int hd_online_cores; /* Current online CORE count */
+
+static unsigned long hd_previous_steal; /* Previous iteration's CPU steal timer total */
+static unsigned long hd_high_time; /* Total time spent while all cpus have high capacity */
+static unsigned long hd_low_time; /* Total time spent while vl cpus have low capacity */
+static atomic64_t hd_adjustments; /* Total occurrence count of hiperdispatch adjustments */
+
+static unsigned int hd_steal_threshold = HD_STEAL_THRESHOLD;
+static unsigned int hd_delay_factor = HD_DELAY_FACTOR;
+static int hd_enabled;
+
+static void hd_capacity_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(hd_capacity_work, hd_capacity_work_fn);
+
+static int hd_set_hiperdispatch_mode(int enable)
+{
+ if (!cpu_has_topology())
+ enable = 0;
+ if (hd_enabled == enable)
+ return 0;
+ hd_enabled = enable;
+ return 1;
+}
+
+void hd_reset_state(void)
+{
+ cpumask_clear(&hd_vl_coremask);
+ cpumask_clear(&hd_vmvl_cpumask);
+ hd_entitled_cores = 0;
+ hd_online_cores = 0;
+}
+
+void hd_add_core(int cpu)
+{
+ const struct cpumask *siblings;
+ int polarization;
+
+ hd_online_cores++;
+ polarization = smp_cpu_get_polarization(cpu);
+ siblings = topology_sibling_cpumask(cpu);
+ switch (polarization) {
+ case POLARIZATION_VH:
+ hd_entitled_cores++;
+ break;
+ case POLARIZATION_VM:
+ hd_entitled_cores++;
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ case POLARIZATION_VL:
+ cpumask_set_cpu(cpu, &hd_vl_coremask);
+ cpumask_or(&hd_vmvl_cpumask, &hd_vmvl_cpumask, siblings);
+ break;
+ }
+}
+
+/* Serialize update and read operations of debug counters. */
+static DEFINE_MUTEX(hd_counter_mutex);
+
+static void hd_update_times(void)
+{
+ static ktime_t prev;
+ ktime_t now;
+
+ /*
+ * Check if hiperdispatch is active, if not set the prev to 0.
+ * This way it is possible to differentiate the first update iteration after
+ * enabling hiperdispatch.
+ */
+ if (hd_entitled_cores == 0 || hd_enabled == 0) {
+ prev = ktime_set(0, 0);
+ return;
+ }
+ now = ktime_get();
+ if (ktime_after(prev, 0)) {
+ if (hd_high_capacity_cores == hd_online_cores)
+ hd_high_time += ktime_ms_delta(now, prev);
+ else
+ hd_low_time += ktime_ms_delta(now, prev);
+ }
+ prev = now;
+}
+
+static void hd_update_capacities(void)
+{
+ int cpu, upscaling_cores;
+ unsigned long capacity;
+
+ upscaling_cores = hd_high_capacity_cores - hd_entitled_cores;
+ capacity = upscaling_cores > 0 ? CPU_CAPACITY_HIGH : CPU_CAPACITY_LOW;
+ hd_high_capacity_cores = hd_entitled_cores;
+ for_each_cpu(cpu, &hd_vl_coremask) {
+ smp_set_core_capacity(cpu, capacity);
+ if (capacity != CPU_CAPACITY_HIGH)
+ continue;
+ hd_high_capacity_cores++;
+ upscaling_cores--;
+ if (upscaling_cores == 0)
+ capacity = CPU_CAPACITY_LOW;
+ }
+}
+
+void hd_disable_hiperdispatch(void)
+{
+ cancel_delayed_work_sync(&hd_capacity_work);
+ hd_high_capacity_cores = hd_online_cores;
+ hd_previous_steal = 0;
+}
+
+int hd_enable_hiperdispatch(void)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ mutex_unlock(&hd_counter_mutex);
+ if (hd_enabled == 0)
+ return 0;
+ if (hd_entitled_cores == 0)
+ return 0;
+ if (hd_online_cores <= hd_entitled_cores)
+ return 0;
+ mod_delayed_work(system_wq, &hd_capacity_work, HD_DELAY_INTERVAL * hd_delay_factor);
+ hd_update_capacities();
+ return 1;
+}
+
+static unsigned long hd_steal_avg(unsigned long new)
+{
+ static unsigned long steal;
+
+ steal = (steal * (HD_STEAL_AVG_WEIGHT - 1) + new) / HD_STEAL_AVG_WEIGHT;
+ return steal;
+}
+
+static unsigned long hd_calculate_steal_percentage(void)
+{
+ unsigned long time_delta, steal_delta, steal, percentage;
+ static ktime_t prev;
+ int cpus, cpu;
+ ktime_t now;
+
+ cpus = 0;
+ steal = 0;
+ percentage = 0;
+ for_each_cpu(cpu, &hd_vmvl_cpumask) {
+ steal += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ cpus++;
+ }
+ /*
+ * If there is no vertical medium and low CPUs steal time
+ * is 0 as vertical high CPUs shouldn't experience steal time.
+ */
+ if (cpus == 0)
+ return percentage;
+ now = ktime_get();
+ time_delta = ktime_to_ns(ktime_sub(now, prev));
+ if (steal > hd_previous_steal && hd_previous_steal != 0) {
+ steal_delta = (steal - hd_previous_steal) * 100 / time_delta;
+ percentage = steal_delta / cpus;
+ }
+ hd_previous_steal = steal;
+ prev = now;
+ return percentage;
+}
+
+static void hd_capacity_work_fn(struct work_struct *work)
+{
+ unsigned long steal_percentage, new_cores;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ /*
+ * If online cores are less or equal to entitled cores hiperdispatch
+ * does not need to make any adjustments, call a topology update to
+ * disable hiperdispatch.
+ * Normally this check is handled on topology update, but during cpu
+ * unhotplug, topology and cpu mask updates are done in reverse
+ * order, causing hd_enable_hiperdispatch() to get stale data.
+ */
+ if (hd_online_cores <= hd_entitled_cores) {
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return;
+ }
+ steal_percentage = hd_steal_avg(hd_calculate_steal_percentage());
+ if (steal_percentage < hd_steal_threshold)
+ new_cores = hd_online_cores;
+ else
+ new_cores = hd_entitled_cores;
+ if (hd_high_capacity_cores != new_cores) {
+ trace_s390_hd_rebuild_domains(hd_high_capacity_cores, new_cores);
+ hd_high_capacity_cores = new_cores;
+ atomic64_inc(&hd_adjustments);
+ topology_schedule_update();
+ }
+ trace_s390_hd_work_fn(steal_percentage, hd_entitled_cores, hd_high_capacity_cores);
+ mutex_unlock(&smp_cpu_state_mutex);
+ schedule_delayed_work(&hd_capacity_work, HD_DELAY_INTERVAL);
+}
+
+static int hiperdispatch_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int hiperdispatch;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &hiperdispatch,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ hiperdispatch = hd_enabled;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ mutex_lock(&smp_cpu_state_mutex);
+ if (hd_set_hiperdispatch_mode(hiperdispatch))
+ topology_schedule_update();
+ mutex_unlock(&smp_cpu_state_mutex);
+ return 0;
+}
+
+static const struct ctl_table hiperdispatch_ctl_table[] = {
+ {
+ .procname = "hiperdispatch",
+ .mode = 0644,
+ .proc_handler = hiperdispatch_ctl_handler,
+ },
+};
+
+static ssize_t hd_steal_threshold_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_steal_threshold);
+}
+
+static ssize_t hd_steal_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (val > 100)
+ return -ERANGE;
+ hd_steal_threshold = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_steal_threshold);
+
+static ssize_t hd_delay_factor_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", hd_delay_factor);
+}
+
+static ssize_t hd_delay_factor_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+ if (!val)
+ return -ERANGE;
+ hd_delay_factor = val;
+ return count;
+}
+
+static DEVICE_ATTR_RW(hd_delay_factor);
+
+static struct attribute *hd_attrs[] = {
+ &dev_attr_hd_steal_threshold.attr,
+ &dev_attr_hd_delay_factor.attr,
+ NULL,
+};
+
+static const struct attribute_group hd_attr_group = {
+ .name = "hiperdispatch",
+ .attrs = hd_attrs,
+};
+
+static int hd_greedy_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_high_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, hd_greedy_time_get, NULL, "%llu\n");
+
+static int hd_conservative_time_get(void *unused, u64 *val)
+{
+ mutex_lock(&hd_counter_mutex);
+ hd_update_times();
+ *val = hd_low_time;
+ mutex_unlock(&hd_counter_mutex);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_fops, hd_conservative_time_get, NULL, "%llu\n");
+
+static int hd_adjustment_count_get(void *unused, u64 *val)
+{
+ *val = atomic64_read(&hd_adjustments);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, hd_adjustment_count_get, NULL, "%llu\n");
+
+static void __init hd_create_debugfs_counters(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("hiperdispatch", arch_debugfs_dir);
+ debugfs_create_file("conservative_time_ms", 0400, dir, NULL, &hd_conservative_time_fops);
+ debugfs_create_file("greedy_time_ms", 0400, dir, NULL, &hd_greedy_time_fops);
+ debugfs_create_file("adjustment_count", 0400, dir, NULL, &hd_adjustments_fops);
+}
+
+static void __init hd_create_attributes(void)
+{
+ struct device *dev;
+
+ dev = bus_get_dev_root(&cpu_subsys);
+ if (!dev)
+ return;
+ if (sysfs_create_group(&dev->kobj, &hd_attr_group))
+ pr_warn("Unable to create hiperdispatch attribute group\n");
+ put_device(dev);
+}
+
+static int __init hd_init(void)
+{
+ if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON)) {
+ hd_set_hiperdispatch_mode(1);
+ topology_schedule_update();
+ }
+ if (!register_sysctl("s390", hiperdispatch_ctl_table))
+ pr_warn("Failed to register s390.hiperdispatch sysctl attribute\n");
+ hd_create_debugfs_counters();
+ hd_create_attributes();
+ return 0;
+}
+late_initcall(hd_init);
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 4bf1ee293f2b..39cb8d0ae348 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -12,9 +12,9 @@
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/cpu.h>
-#include <linux/sched/cputime.h>
#include <trace/events/power.h>
#include <asm/cpu_mf.h>
+#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include "entry.h"
@@ -24,117 +24,66 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
+ unsigned long idle_time;
u64 cycles_new[8];
int i;
- clear_cpu_flag(CIF_ENABLED_WAIT);
if (smp_cpu_mtid) {
stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
for (i = 0; i < smp_cpu_mtid; i++)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle->clock_idle_exit = S390_lowcore.int_clock;
- idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
+ idle_time = lc->int_clock - idle->clock_idle_enter;
+
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
- S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = idle->clock_idle_exit;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = idle->timer_idle_exit;
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+ WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+ account_idle_time(cputime_to_nsecs(idle_time));
}
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
- unsigned long idle_time;
unsigned long psw_mask;
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
-
- /* psw_idle() returns with interrupts disabled. */
- psw_idle(idle, psw_mask);
-
- /* Account time spent with enabled wait psw loaded as idle time. */
- raw_write_seqcount_begin(&idle->seqcount);
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
- idle->idle_time += idle_time;
- idle->idle_count++;
- account_idle_time(cputime_to_nsecs(idle_time));
- raw_write_seqcount_end(&idle->seqcount);
- raw_local_irq_enable();
+ set_cpu_flag(CIF_ENABLED_WAIT);
+ if (smp_cpu_mtid)
+ stcctm(MT_DIAG, smp_cpu_mtid, (u64 *)&idle->mt_cycles_enter);
+ idle->clock_idle_enter = get_tod_clock_fast();
+ idle->timer_idle_enter = get_cpu_timer();
+ bpon();
+ __load_psw_mask(psw_mask);
}
static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long idle_count;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_count = READ_ONCE(idle->idle_count);
- if (READ_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while (read_seqcount_retry(&idle->seqcount, seq));
- return sprintf(buf, "%lu\n", idle_count);
+
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
}
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
- unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_time = READ_ONCE(idle->idle_time);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- idle_time += in_idle;
- return sprintf(buf, "%lu\n", idle_time >> 12);
-}
-DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-u64 arch_cpu_idle_time(int cpu)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long now, idle_enter, idle_exit, in_idle;
- unsigned int seq;
-
- do {
- seq = read_seqcount_begin(&idle->seqcount);
- idle_enter = READ_ONCE(idle->clock_idle_enter);
- idle_exit = READ_ONCE(idle->clock_idle_exit);
- } while (read_seqcount_retry(&idle->seqcount, seq));
- in_idle = 0;
- now = get_tod_clock();
- if (idle_enter) {
- if (idle_exit) {
- in_idle = idle_exit - idle_enter;
- } else if (now > idle_enter) {
- in_idle = now - idle_enter;
- }
- }
- return cputime_to_nsecs(in_idle);
+ return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
void arch_cpu_idle_enter(void)
{
@@ -144,7 +93,7 @@ void arch_cpu_idle_exit(void)
{
}
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
cpu_die();
}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..ff15f91affde 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -12,6 +12,7 @@
#include <linux/init.h>
#include <linux/device.h>
#include <linux/delay.h>
+#include <linux/kstrtox.h>
#include <linux/panic_notifier.h>
#include <linux/reboot.h>
#include <linux/ctype.h>
@@ -19,7 +20,9 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/ipl.h>
#include <asm/smp.h>
@@ -29,6 +32,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
@@ -38,6 +42,8 @@
#define IPL_UNKNOWN_STR "unknown"
#define IPL_CCW_STR "ccw"
+#define IPL_ECKD_STR "eckd"
+#define IPL_ECKD_DUMP_STR "eckd_dump"
#define IPL_FCP_STR "fcp"
#define IPL_FCP_DUMP_STR "fcp_dump"
#define IPL_NVME_STR "nvme"
@@ -45,6 +51,7 @@
#define IPL_NSS_STR "nss"
#define DUMP_CCW_STR "ccw"
+#define DUMP_ECKD_STR "eckd"
#define DUMP_FCP_STR "fcp"
#define DUMP_NVME_STR "nvme"
#define DUMP_NONE_STR "none"
@@ -91,6 +98,10 @@ static char *ipl_type_str(enum ipl_type type)
switch (type) {
case IPL_TYPE_CCW:
return IPL_CCW_STR;
+ case IPL_TYPE_ECKD:
+ return IPL_ECKD_STR;
+ case IPL_TYPE_ECKD_DUMP:
+ return IPL_ECKD_DUMP_STR;
case IPL_TYPE_FCP:
return IPL_FCP_STR;
case IPL_TYPE_FCP_DUMP:
@@ -112,6 +123,7 @@ enum dump_type {
DUMP_TYPE_CCW = 2,
DUMP_TYPE_FCP = 4,
DUMP_TYPE_NVME = 8,
+ DUMP_TYPE_ECKD = 16,
};
static char *dump_type_str(enum dump_type type)
@@ -121,6 +133,8 @@ static char *dump_type_str(enum dump_type type)
return DUMP_NONE_STR;
case DUMP_TYPE_CCW:
return DUMP_CCW_STR;
+ case DUMP_TYPE_ECKD:
+ return DUMP_ECKD_STR;
case DUMP_TYPE_FCP:
return DUMP_FCP_STR;
case DUMP_TYPE_NVME:
@@ -146,6 +160,7 @@ static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
static struct ipl_parameter_block *reipl_block_fcp;
static struct ipl_parameter_block *reipl_block_nvme;
static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_eckd;
static struct ipl_parameter_block *reipl_block_nss;
static struct ipl_parameter_block *reipl_block_actual;
@@ -154,20 +169,24 @@ static enum dump_type dump_type = DUMP_TYPE_NONE;
static struct ipl_parameter_block *dump_block_fcp;
static struct ipl_parameter_block *dump_block_nvme;
static struct ipl_parameter_block *dump_block_ccw;
+static struct ipl_parameter_block *dump_block_eckd;
static struct sclp_ipl_info sclp_ipl_info;
static bool reipl_nvme_clear;
static bool reipl_fcp_clear;
static bool reipl_ccw_clear;
+static bool reipl_eckd_clear;
-static inline int __diag308(unsigned long subcode, void *addr)
+static unsigned long os_info_flags;
+
+static inline int __diag308(unsigned long subcode, unsigned long addr)
{
union register_pair r1;
- r1.even = (unsigned long) addr;
+ r1.even = addr;
r1.odd = 0;
- asm volatile(
+ asm_inline volatile(
" diag %[r1],%[subcode],0x308\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
@@ -180,7 +199,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
int diag308(unsigned long subcode, void *addr)
{
diag_stat_inc(DIAG_STAT_X308);
- return __diag308(subcode, addr);
+ return __diag308(subcode, addr ? virt_to_phys(addr) : 0);
}
EXPORT_SYMBOL_GPL(diag308);
@@ -191,7 +210,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
char *page) \
{ \
- return scnprintf(page, PAGE_SIZE, _format, ##args); \
+ return sysfs_emit(page, _format, ##args); \
}
#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
@@ -217,14 +236,14 @@ IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
_ipl_blk.ssid, _ipl_blk.devno); \
IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name, (S_IRUGO | S_IWUSR), \
+ __ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store) \
#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
+ __ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL)
#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
@@ -239,7 +258,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ __ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
@@ -249,15 +268,74 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
struct kobj_attribute *attr, \
const char *buf, size_t len) \
{ \
- strncpy(_value, buf, sizeof(_value) - 1); \
+ if (len >= sizeof(_value)) \
+ return -E2BIG; \
+ len = strscpy(_value, buf); \
+ if ((ssize_t)len < 0) \
+ return len; \
strim(_value); \
return len; \
} \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
- __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ __ATTR(_name, 0644, \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
+#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t size = _ipl_block.scp_data_len; \
+ void *scp_data = _ipl_block.scp_data; \
+ \
+ return memory_read_from_buffer(buf, count, &off, \
+ scp_data, size); \
+}
+
+#define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \
+ struct kobject *kobj, \
+ const struct bin_attribute *attr, \
+ char *buf, loff_t off, \
+ size_t count) \
+{ \
+ size_t scpdata_len = count; \
+ size_t padding; \
+ \
+ if (off) \
+ return -EINVAL; \
+ \
+ memcpy(_ipl_block.scp_data, buf, count); \
+ if (scpdata_len % 8) { \
+ padding = 8 - (scpdata_len % 8); \
+ memset(_ipl_block.scp_data + scpdata_len, \
+ 0, padding); \
+ scpdata_len += padding; \
+ } \
+ \
+ _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \
+ _ipl_block.len = _ipl_bp0_len + scpdata_len; \
+ _ipl_block.scp_data_len = scpdata_len; \
+ \
+ return count; \
+}
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \
+ NULL, _size)
+
+#define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\
+IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \
+IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\
+static const struct bin_attribute sys_##_prefix##_scp_data_attr = \
+ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \
+ sys_##_prefix##_scp_data_store, _size)
+
/*
* ipl section
*/
@@ -280,6 +358,11 @@ static __init enum ipl_type get_ipl_type(void)
return IPL_TYPE_NVME_DUMP;
else
return IPL_TYPE_NVME;
+ case IPL_PBT_ECKD:
+ if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+ return IPL_TYPE_ECKD_DUMP;
+ else
+ return IPL_TYPE_ECKD;
}
return IPL_TYPE_UNKNOWN;
}
@@ -290,7 +373,7 @@ EXPORT_SYMBOL_GPL(ipl_info);
static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(ipl_info.type));
}
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
@@ -298,7 +381,7 @@ static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
static ssize_t ipl_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!ipl_secure_flag);
+ return sysfs_emit(page, "%i\n", !!ipl_secure_flag);
}
static struct kobj_attribute sys_ipl_secure_attr =
@@ -307,7 +390,7 @@ static struct kobj_attribute sys_ipl_secure_attr =
static ssize_t ipl_has_secure_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%i\n", !!sclp.has_sipl);
+ return sysfs_emit(page, "%i\n", !!sclp.has_sipl);
}
static struct kobj_attribute sys_ipl_has_secure_attr =
@@ -320,79 +403,69 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj,
if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
- return sprintf(page, "%s\n", parm);
+ return sysfs_emit(page, "%s\n", parm);
}
static struct kobj_attribute sys_ipl_vm_parm_attr =
- __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+ __ATTR(parm, 0444, ipl_vm_parm_show, NULL);
static ssize_t sys_ipl_device_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
- ipl_block.ccw.devno);
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ return sysfs_emit(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+ ipl_block.eckd.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ return sysfs_emit(page, "0.0.%04x\n", ipl_block.fcp.devno);
case IPL_TYPE_NVME:
case IPL_TYPE_NVME_DUMP:
- return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+ return sysfs_emit(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
}
static struct kobj_attribute sys_ipl_device_attr =
- __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+ __ATTR(device, 0444, sys_ipl_device_show, NULL);
-static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
+static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
{
return memory_read_from_buffer(buf, count, &off, &ipl_block,
ipl_block.hdr.len);
}
-static struct bin_attribute ipl_parameter_attr =
- __BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+static const struct bin_attribute sys_ipl_parameter_attr =
+ __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL,
PAGE_SIZE);
-static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = ipl_block.fcp.scp_data_len;
- void *scp_data = &ipl_block.fcp.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
- loff_t off, size_t count)
-{
- unsigned int size = ipl_block.nvme.scp_data_len;
- void *scp_data = &ipl_block.nvme.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE);
-static struct bin_attribute ipl_scp_data_attr =
- __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+static const struct bin_attribute *const ipl_fcp_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_fcp_scp_data_attr,
+ NULL,
+};
-static struct bin_attribute ipl_nvme_scp_data_attr =
- __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE);
-static struct bin_attribute *ipl_fcp_bin_attrs[] = {
- &ipl_parameter_attr,
- &ipl_scp_data_attr,
+static const struct bin_attribute *const ipl_nvme_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_nvme_scp_data_attr,
NULL,
};
-static struct bin_attribute *ipl_nvme_bin_attrs[] = {
- &ipl_parameter_attr,
- &ipl_nvme_scp_data_attr,
+DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE);
+
+static const struct bin_attribute *const ipl_eckd_bin_attrs[] = {
+ &sys_ipl_parameter_attr,
+ &sys_ipl_eckd_scp_data_attr,
NULL,
};
@@ -417,103 +490,180 @@ DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
(unsigned long long)ipl_block.nvme.br_lba);
+/* ECKD ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.eckd.bootprog);
+
+#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb) \
+static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+{ \
+ struct ipl_pb0_eckd *ipb = &(_ipb); \
+ \
+ if (!ipb->br_chr.cyl && \
+ !ipb->br_chr.head && \
+ !ipb->br_chr.record) \
+ return sysfs_emit(buf, "auto\n"); \
+ \
+ return sysfs_emit(buf, "0x%x,0x%x,0x%x\n", \
+ ipb->br_chr.cyl, \
+ ipb->br_chr.head, \
+ ipb->br_chr.record); \
+}
+
+#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb) \
+static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct ipl_pb0_eckd *ipb = &(_ipb); \
+ unsigned long args[3] = { 0 }; \
+ char *p, *p1, *tmp = NULL; \
+ int i, rc; \
+ \
+ if (!strncmp(buf, "auto", 4)) \
+ goto out; \
+ \
+ tmp = kstrdup(buf, GFP_KERNEL); \
+ p = tmp; \
+ for (i = 0; i < 3; i++) { \
+ p1 = strsep(&p, ", "); \
+ if (!p1) { \
+ rc = -EINVAL; \
+ goto err; \
+ } \
+ rc = kstrtoul(p1, 0, args + i); \
+ if (rc) \
+ goto err; \
+ } \
+ \
+ rc = -EINVAL; \
+ if (i != 3) \
+ goto err; \
+ \
+ if ((args[0] || args[1]) && !args[2]) \
+ goto err; \
+ \
+ if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255) \
+ goto err; \
+ \
+out: \
+ ipb->br_chr.cyl = args[0]; \
+ ipb->br_chr.head = args[1]; \
+ ipb->br_chr.record = args[2]; \
+ rc = len; \
+err: \
+ kfree(tmp); \
+ return rc; \
+}
+
+IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd);
+static struct kobj_attribute sys_ipl_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL);
+
+IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd);
+
+static struct kobj_attribute sys_reipl_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store);
+
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
char loadparm[LOADPARM_LEN + 1] = {};
if (!sclp_ipl_info.is_valid)
- return sprintf(page, "#unknown#\n");
+ return sysfs_emit(page, "#unknown#\n");
memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
strim(loadparm);
- return sprintf(page, "%s\n", loadparm);
+ return sysfs_emit(page, "%s\n", loadparm);
}
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
static struct attribute *ipl_fcp_attrs[] = {
- &sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_fcp_wwpn_attr.attr,
&sys_ipl_fcp_lun_attr.attr,
&sys_ipl_fcp_bootprog_attr.attr,
&sys_ipl_fcp_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
- &sys_ipl_secure_attr.attr,
- &sys_ipl_has_secure_attr.attr,
NULL,
};
-static struct attribute_group ipl_fcp_attr_group = {
+static const struct attribute_group ipl_fcp_attr_group = {
.attrs = ipl_fcp_attrs,
- .bin_attrs = ipl_fcp_bin_attrs,
+ .bin_attrs_new = ipl_fcp_bin_attrs,
};
static struct attribute *ipl_nvme_attrs[] = {
- &sys_ipl_type_attr.attr,
&sys_ipl_nvme_fid_attr.attr,
&sys_ipl_nvme_nsid_attr.attr,
&sys_ipl_nvme_bootprog_attr.attr,
&sys_ipl_nvme_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
- &sys_ipl_secure_attr.attr,
- &sys_ipl_has_secure_attr.attr,
NULL,
};
-static struct attribute_group ipl_nvme_attr_group = {
+static const struct attribute_group ipl_nvme_attr_group = {
.attrs = ipl_nvme_attrs,
- .bin_attrs = ipl_nvme_bin_attrs,
+ .bin_attrs_new = ipl_nvme_bin_attrs,
};
+static struct attribute *ipl_eckd_attrs[] = {
+ &sys_ipl_eckd_bootprog_attr.attr,
+ &sys_ipl_eckd_br_chr_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_device_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group ipl_eckd_attr_group = {
+ .attrs = ipl_eckd_attrs,
+ .bin_attrs_new = ipl_eckd_bin_attrs,
+};
/* CCW ipl device attributes */
static struct attribute *ipl_ccw_attrs_vm[] = {
- &sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_vm_parm_attr.attr,
- &sys_ipl_secure_attr.attr,
- &sys_ipl_has_secure_attr.attr,
NULL,
};
static struct attribute *ipl_ccw_attrs_lpar[] = {
- &sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
- &sys_ipl_secure_attr.attr,
- &sys_ipl_has_secure_attr.attr,
NULL,
};
-static struct attribute_group ipl_ccw_attr_group_vm = {
+static const struct attribute_group ipl_ccw_attr_group_vm = {
.attrs = ipl_ccw_attrs_vm,
};
-static struct attribute_group ipl_ccw_attr_group_lpar = {
+static const struct attribute_group ipl_ccw_attr_group_lpar = {
.attrs = ipl_ccw_attrs_lpar
};
-/* UNKNOWN ipl device attributes */
-
-static struct attribute *ipl_unknown_attrs[] = {
+static struct attribute *ipl_common_attrs[] = {
&sys_ipl_type_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
-static struct attribute_group ipl_unknown_attr_group = {
- .attrs = ipl_unknown_attrs,
+static const struct attribute_group ipl_common_attr_group = {
+ .attrs = ipl_common_attrs,
};
static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
- __bpon();
diag308(DIAG308_LOAD_CLEAR, NULL);
}
@@ -531,15 +681,22 @@ static int __init ipl_init(void)
rc = -ENOMEM;
goto out;
}
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+ if (rc)
+ goto out;
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_vm);
else
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_ccw_attr_group_lpar);
break;
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
+ break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
@@ -549,8 +706,6 @@ static int __init ipl_init(void)
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
break;
default:
- rc = sysfs_create_group(&ipl_kset->kobj,
- &ipl_unknown_attr_group);
break;
}
out:
@@ -577,7 +732,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
- return sprintf(page, "%s\n", vmparm);
+ return sysfs_emit(page, "%s\n", vmparm);
}
static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
@@ -641,54 +796,20 @@ static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
}
static struct kobj_attribute sys_reipl_nss_vmparm_attr =
- __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
- reipl_nss_vmparm_store);
+ __ATTR(parm, 0644, reipl_nss_vmparm_show,
+ reipl_nss_vmparm_store);
static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
- __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
- reipl_ccw_vmparm_store);
+ __ATTR(parm, 0644, reipl_ccw_vmparm_show,
+ reipl_ccw_vmparm_store);
/* FCP reipl device attributes */
-static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_fcp->fcp.scp_data_len;
- void *scp_data = reipl_block_fcp->fcp.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t scpdata_len = count;
- size_t padding;
-
-
- if (off)
- return -EINVAL;
-
- memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
-
- reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
- reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
- reipl_block_fcp->fcp.scp_data_len = scpdata_len;
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr,
+ reipl_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
- return count;
-}
-static struct bin_attribute sys_reipl_fcp_scp_data_attr =
- __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
- reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+static const struct bin_attribute *const reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
NULL,
};
@@ -719,7 +840,7 @@ static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
char buf[LOADPARM_LEN + 1];
reipl_get_ascii_loadparm(buf, ipb);
- return sprintf(page, "%s\n", buf);
+ return sysfs_emit(page, "%s\n", buf);
}
static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
@@ -750,35 +871,39 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return len;
}
-/* FCP wrapper */
-static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_fcp, page);
-}
-
-static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
- reipl_fcp_loadparm_store);
+#define DEFINE_GENERIC_LOADPARM(name) \
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *page) \
+{ \
+ return reipl_generic_loadparm_show(reipl_block_##name, page); \
+} \
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \
+} \
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \
+ __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \
+ reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_fcp_clear);
+ return sysfs_emit(page, "%u\n", reipl_fcp_clear);
}
static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t len)
{
- if (strtobool(buf, &reipl_fcp_clear) < 0)
+ if (kstrtobool(buf, &reipl_fcp_clear) < 0)
return -EINVAL;
return len;
}
@@ -793,9 +918,9 @@ static struct attribute *reipl_fcp_attrs[] = {
NULL,
};
-static struct attribute_group reipl_fcp_attr_group = {
+static const struct attribute_group reipl_fcp_attr_group = {
.attrs = reipl_fcp_attrs,
- .bin_attrs = reipl_fcp_bin_attrs,
+ .bin_attrs_new = reipl_fcp_bin_attrs,
};
static struct kobj_attribute sys_reipl_fcp_clear_attr =
@@ -803,46 +928,12 @@ static struct kobj_attribute sys_reipl_fcp_clear_attr =
/* NVME reipl device attributes */
-static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t size = reipl_block_nvme->nvme.scp_data_len;
- void *scp_data = reipl_block_nvme->nvme.scp_data;
-
- return memory_read_from_buffer(buf, count, &off, scp_data, size);
-}
-
-static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr,
- char *buf, loff_t off, size_t count)
-{
- size_t scpdata_len = count;
- size_t padding;
-
- if (off)
- return -EINVAL;
-
- memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
- if (scpdata_len % 8) {
- padding = 8 - (scpdata_len % 8);
- memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
- 0, padding);
- scpdata_len += padding;
- }
-
- reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
- reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
- reipl_block_nvme->nvme.scp_data_len = scpdata_len;
-
- return count;
-}
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr,
+ reipl_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
-static struct bin_attribute sys_reipl_nvme_scp_data_attr =
- __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read,
- reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
-
-static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+static const struct bin_attribute *const reipl_nvme_bin_attrs[] = {
&sys_reipl_nvme_scp_data_attr,
NULL,
};
@@ -856,24 +947,6 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
reipl_block_nvme->nvme.br_lba);
-/* nvme wrapper */
-static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nvme, page);
-}
-
-static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show,
- reipl_nvme_loadparm_store);
-
static struct attribute *reipl_nvme_attrs[] = {
&sys_reipl_nvme_fid_attr.attr,
&sys_reipl_nvme_nsid_attr.attr,
@@ -883,22 +956,22 @@ static struct attribute *reipl_nvme_attrs[] = {
NULL,
};
-static struct attribute_group reipl_nvme_attr_group = {
+static const struct attribute_group reipl_nvme_attr_group = {
.attrs = reipl_nvme_attrs,
- .bin_attrs = reipl_nvme_bin_attrs
+ .bin_attrs_new = reipl_nvme_bin_attrs
};
static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_nvme_clear);
+ return sysfs_emit(page, "%u\n", reipl_nvme_clear);
}
static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t len)
{
- if (strtobool(buf, &reipl_nvme_clear) < 0)
+ if (kstrtobool(buf, &reipl_nvme_clear) < 0)
return -EINVAL;
return len;
}
@@ -909,49 +982,17 @@ static struct kobj_attribute sys_reipl_nvme_clear_attr =
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
-/* NSS wrapper */
-static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_nss, page);
-}
-
-static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
-}
-
-/* CCW wrapper */
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
-{
- return reipl_generic_loadparm_show(reipl_block_ccw, page);
-}
-
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t len)
-{
- return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
-}
-
-static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
- reipl_ccw_loadparm_store);
-
static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%u\n", reipl_ccw_clear);
+ return sysfs_emit(page, "%u\n", reipl_ccw_clear);
}
static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t len)
{
- if (strtobool(buf, &reipl_ccw_clear) < 0)
+ if (kstrtobool(buf, &reipl_ccw_clear) < 0)
return -EINVAL;
return len;
}
@@ -984,6 +1025,52 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
.attrs = reipl_ccw_attrs_lpar,
};
+/* ECKD reipl device attributes */
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr,
+ reipl_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static const struct bin_attribute *const reipl_eckd_bin_attrs[] = {
+ &sys_reipl_eckd_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+ reipl_block_eckd->eckd.bootprog);
+
+static struct attribute *reipl_eckd_attrs[] = {
+ &sys_reipl_eckd_device_attr.attr,
+ &sys_reipl_eckd_bootprog_attr.attr,
+ &sys_reipl_eckd_br_chr_attr.attr,
+ &sys_reipl_eckd_loadparm_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group reipl_eckd_attr_group = {
+ .attrs = reipl_eckd_attrs,
+ .bin_attrs_new = reipl_eckd_bin_attrs
+};
+
+static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%u\n", reipl_eckd_clear);
+}
+
+static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (kstrtobool(buf, &reipl_eckd_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_eckd_clear_attr =
+ __ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store);
/* NSS reipl device attributes */
static void reipl_get_ascii_nss_name(char *dst,
@@ -1000,7 +1087,7 @@ static ssize_t reipl_nss_name_show(struct kobject *kobj,
char nss_name[NSS_NAME_SIZE + 1] = {};
reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
- return sprintf(page, "%s\n", nss_name);
+ return sysfs_emit(page, "%s\n", nss_name);
}
static ssize_t reipl_nss_name_store(struct kobject *kobj,
@@ -1031,12 +1118,8 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
}
static struct kobj_attribute sys_reipl_nss_name_attr =
- __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
- reipl_nss_name_store);
-
-static struct kobj_attribute sys_reipl_nss_loadparm_attr =
- __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
- reipl_nss_loadparm_store);
+ __ATTR(name, 0644, reipl_nss_name_show,
+ reipl_nss_name_store);
static struct attribute *reipl_nss_attrs[] = {
&sys_reipl_nss_name_attr.attr,
@@ -1052,8 +1135,8 @@ static struct attribute_group reipl_nss_attr_group = {
void set_os_info_reipl_block(void)
{
- os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
- reipl_block_actual->hdr.len);
+ os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block_actual->hdr.len);
}
/* reipl type */
@@ -1067,6 +1150,9 @@ static int reipl_set_type(enum ipl_type type)
case IPL_TYPE_CCW:
reipl_block_actual = reipl_block_ccw;
break;
+ case IPL_TYPE_ECKD:
+ reipl_block_actual = reipl_block_eckd;
+ break;
case IPL_TYPE_FCP:
reipl_block_actual = reipl_block_fcp;
break;
@@ -1086,7 +1172,7 @@ static int reipl_set_type(enum ipl_type type)
static ssize_t reipl_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+ return sysfs_emit(page, "%s\n", ipl_type_str(reipl_type));
}
static ssize_t reipl_type_store(struct kobject *kobj,
@@ -1097,6 +1183,8 @@ static ssize_t reipl_type_store(struct kobject *kobj,
if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_CCW);
+ else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_ECKD);
else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_FCP);
else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
@@ -1112,6 +1200,7 @@ static struct kobj_attribute reipl_type_attr =
static struct kset *reipl_kset;
static struct kset *reipl_fcp_kset;
static struct kset *reipl_nvme_kset;
+static struct kset *reipl_eckd_kset;
static void __reipl_run(void *unused)
{
@@ -1123,6 +1212,13 @@ static void __reipl_run(void *unused)
else
diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
break;
+ case IPL_TYPE_ECKD:
+ diag308(DIAG308_SET, reipl_block_eckd);
+ if (reipl_eckd_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
+ break;
case IPL_TYPE_FCP:
diag308(DIAG308_SET, reipl_block_fcp);
if (reipl_fcp_clear)
@@ -1146,6 +1242,7 @@ static void __reipl_run(void *unused)
break;
case IPL_TYPE_FCP_DUMP:
case IPL_TYPE_NVME_DUMP:
+ case IPL_TYPE_ECKD_DUMP:
break;
}
disabled_wait();
@@ -1176,7 +1273,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
- if (MACHINE_IS_VM && ipl_block_valid &&
+ if (machine_is_vm() && ipl_block_valid &&
(ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
@@ -1190,7 +1287,7 @@ static int __init reipl_nss_init(void)
{
int rc;
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 0;
reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
@@ -1215,8 +1312,8 @@ static int __init reipl_ccw_init(void)
return -ENOMEM;
rc = sysfs_create_group(&reipl_kset->kobj,
- MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
- : &reipl_ccw_attr_group_lpar);
+ machine_is_vm() ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
if (rc)
return rc;
@@ -1343,6 +1440,58 @@ out1:
return rc;
}
+static int __init reipl_eckd_init(void)
+{
+ int rc;
+
+ if (!sclp.has_sipl_eckd)
+ return 0;
+
+ reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_eckd)
+ return -ENOMEM;
+
+ /* sysfs: create kset for mixing attr group and bin attrs */
+ reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_eckd_kset) {
+ free_page((unsigned long)reipl_block_eckd);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_eckd_kset->kobj,
+ &sys_reipl_eckd_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_eckd_clear = true;
+ }
+
+ if (ipl_info.type == IPL_TYPE_ECKD) {
+ memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block));
+ } else {
+ reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+ reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+ reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+ reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_ECKD;
+ return 0;
+
+out2:
+ sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+out1:
+ kset_unregister(reipl_eckd_kset);
+ free_page((unsigned long)reipl_block_eckd);
+ return rc;
+}
+
static int __init reipl_type_init(void)
{
enum ipl_type reipl_type = ipl_info.type;
@@ -1364,6 +1513,9 @@ static int __init reipl_type_init(void)
} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) {
+ memcpy(reipl_block_eckd, reipl_block, size);
+ reipl_type = IPL_TYPE_ECKD;
}
out:
return reipl_set_type(reipl_type);
@@ -1384,6 +1536,9 @@ static int __init reipl_init(void)
rc = reipl_ccw_init();
if (rc)
return rc;
+ rc = reipl_eckd_init();
+ if (rc)
+ return rc;
rc = reipl_fcp_init();
if (rc)
return rc;
@@ -1419,6 +1574,11 @@ DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
dump_block_fcp->fcp.devno);
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_fcp, dump_block_fcp->hdr,
+ dump_block_fcp->fcp,
+ IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN,
+ DIAG308_SCPDATA_SIZE);
+
static struct attribute *dump_fcp_attrs[] = {
&sys_dump_fcp_device_attr.attr,
&sys_dump_fcp_wwpn_attr.attr,
@@ -1428,9 +1588,15 @@ static struct attribute *dump_fcp_attrs[] = {
NULL,
};
-static struct attribute_group dump_fcp_attr_group = {
+static const struct bin_attribute *const dump_fcp_bin_attrs[] = {
+ &sys_dump_fcp_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_fcp_attr_group = {
.name = IPL_FCP_STR,
.attrs = dump_fcp_attrs,
+ .bin_attrs_new = dump_fcp_bin_attrs,
};
/* NVME dump device attributes */
@@ -1443,6 +1609,11 @@ DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
dump_block_nvme->nvme.br_lba);
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_nvme, dump_block_nvme->hdr,
+ dump_block_nvme->nvme,
+ IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN,
+ DIAG308_SCPDATA_SIZE);
+
static struct attribute *dump_nvme_attrs[] = {
&sys_dump_nvme_fid_attr.attr,
&sys_dump_nvme_nsid_attr.attr,
@@ -1451,9 +1622,49 @@ static struct attribute *dump_nvme_attrs[] = {
NULL,
};
-static struct attribute_group dump_nvme_attr_group = {
+static const struct bin_attribute *const dump_nvme_bin_attrs[] = {
+ &sys_dump_nvme_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_nvme_attr_group = {
.name = IPL_NVME_STR,
.attrs = dump_nvme_attrs,
+ .bin_attrs_new = dump_nvme_bin_attrs,
+};
+
+/* ECKD dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+ dump_block_eckd->eckd.bootprog);
+
+IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
+
+static struct kobj_attribute sys_dump_eckd_br_chr_attr =
+ __ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+
+DEFINE_IPL_ATTR_SCP_DATA_RW(dump_eckd, dump_block_eckd->hdr,
+ dump_block_eckd->eckd,
+ IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN,
+ DIAG308_SCPDATA_SIZE);
+
+static struct attribute *dump_eckd_attrs[] = {
+ &sys_dump_eckd_device_attr.attr,
+ &sys_dump_eckd_bootprog_attr.attr,
+ &sys_dump_eckd_br_chr_attr.attr,
+ NULL,
+};
+
+static const struct bin_attribute *const dump_eckd_bin_attrs[] = {
+ &sys_dump_eckd_scp_data_attr,
+ NULL,
+};
+
+static const struct attribute_group dump_eckd_attr_group = {
+ .name = IPL_ECKD_STR,
+ .attrs = dump_eckd_attrs,
+ .bin_attrs_new = dump_eckd_bin_attrs,
};
/* CCW dump device attributes */
@@ -1482,7 +1693,7 @@ static int dump_set_type(enum dump_type type)
static ssize_t dump_type_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", dump_type_str(dump_type));
+ return sysfs_emit(page, "%s\n", dump_type_str(dump_type));
}
static ssize_t dump_type_store(struct kobject *kobj,
@@ -1495,6 +1706,8 @@ static ssize_t dump_type_store(struct kobject *kobj,
rc = dump_set_type(DUMP_TYPE_NONE);
else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_CCW);
+ else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_ECKD);
else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_FCP);
else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
@@ -1505,6 +1718,24 @@ static ssize_t dump_type_store(struct kobject *kobj,
static struct kobj_attribute dump_type_attr =
__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+static ssize_t dump_area_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sysfs_emit(page, "%lu\n", sclp.hsa_size);
+}
+
+static struct kobj_attribute dump_area_size_attr = __ATTR_RO(dump_area_size);
+
+static struct attribute *dump_attrs[] = {
+ &dump_type_attr.attr,
+ &dump_area_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_attr_group = {
+ .attrs = dump_attrs,
+};
+
static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
@@ -1523,6 +1754,9 @@ static void __dump_run(void *unused)
case DUMP_TYPE_CCW:
diag308_dump(dump_block_ccw);
break;
+ case DUMP_TYPE_ECKD:
+ diag308_dump(dump_block_eckd);
+ break;
case DUMP_TYPE_FCP:
diag308_dump(dump_block_fcp);
break;
@@ -1601,13 +1835,36 @@ static int __init dump_nvme_init(void)
}
dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
- dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
- dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ dump_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_NVME;
return 0;
}
+static int __init dump_eckd_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_eckd)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_eckd);
+ return rc;
+ }
+ dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+ dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+ dump_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+ dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_ECKD;
+ return 0;
+}
+
static int __init dump_init(void)
{
int rc;
@@ -1615,7 +1872,7 @@ static int __init dump_init(void)
dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
if (!dump_kset)
return -ENOMEM;
- rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_attr_group);
if (rc) {
kset_unregister(dump_kset);
return rc;
@@ -1623,6 +1880,9 @@ static int __init dump_init(void)
rc = dump_ccw_init();
if (rc)
return rc;
+ rc = dump_eckd_init();
+ if (rc)
+ return rc;
rc = dump_fcp_init();
if (rc)
return rc;
@@ -1641,13 +1901,28 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
- unsigned long ipib = (unsigned long) reipl_block_actual;
+ struct lowcore *abs_lc;
unsigned int csum;
- csum = (__force unsigned int)
- csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
- put_abs_lowcore(ipib, ipib);
- put_abs_lowcore(ipib_checksum, csum);
+ /*
+ * Set REIPL_CLEAR flag in os_info flags entry indicating
+ * 'clear' sysfs attribute has been set on the panicked system
+ * for specified reipl type.
+ * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+ */
+ if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+ (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+ (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+ (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+ reipl_type == IPL_TYPE_NSS ||
+ reipl_type == IPL_TYPE_UNKNOWN)
+ os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+ os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+ csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ abs_lc = get_abs_lowcore();
+ abs_lc->ipib = __pa(reipl_block_actual);
+ abs_lc->ipib_checksum = csum;
+ put_abs_lowcore(abs_lc);
dump_run(trigger);
}
@@ -1660,11 +1935,13 @@ static struct shutdown_action __refdata dump_reipl_action = {
* vmcmd shutdown action: Trigger vm command on shutdown.
*/
-static char vmcmd_on_reboot[128];
-static char vmcmd_on_panic[128];
-static char vmcmd_on_halt[128];
-static char vmcmd_on_poff[128];
-static char vmcmd_on_restart[128];
+#define VMCMD_MAX_SIZE 240
+
+static char vmcmd_on_reboot[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_panic[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_halt[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_poff[VMCMD_MAX_SIZE + 1];
+static char vmcmd_on_restart[VMCMD_MAX_SIZE + 1];
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
@@ -1711,7 +1988,7 @@ static void vmcmd_run(struct shutdown_trigger *trigger)
static int vmcmd_init(void)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return -EOPNOTSUPP;
vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
if (!vmcmd_kset)
@@ -1776,7 +2053,7 @@ static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
static ssize_t on_reboot_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_reboot_trigger.action->name);
}
static ssize_t on_reboot_store(struct kobject *kobj,
@@ -1802,7 +2079,7 @@ static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
static ssize_t on_panic_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_panic_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_panic_trigger.action->name);
}
static ssize_t on_panic_store(struct kobject *kobj,
@@ -1828,7 +2105,7 @@ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
static ssize_t on_restart_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_restart_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_restart_trigger.action->name);
}
static ssize_t on_restart_store(struct kobject *kobj,
@@ -1854,7 +2131,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, arg);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
@@ -1864,7 +2141,7 @@ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
static ssize_t on_halt_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_halt_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_halt_trigger.action->name);
}
static ssize_t on_halt_store(struct kobject *kobj,
@@ -1890,7 +2167,7 @@ static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
static ssize_t on_poff_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- return sprintf(page, "%s\n", on_poff_trigger.action->name);
+ return sysfs_emit(page, "%s\n", on_poff_trigger.action->name);
}
static ssize_t on_poff_store(struct kobject *kobj,
@@ -1972,26 +2249,28 @@ static int __init s390_ipl_init(void)
__initcall(s390_ipl_init);
-static void __init strncpy_skip_quote(char *dst, char *src, int n)
+static void __init strscpy_skip_quote(char *dst, char *src, int n)
{
int sx, dx;
- dx = 0;
- for (sx = 0; src[sx] != 0; sx++) {
+ if (!n)
+ return;
+ for (sx = 0, dx = 0; src[sx]; sx++) {
if (src[sx] == '"')
continue;
- dst[dx++] = src[sx];
- if (dx >= n)
+ dst[dx] = src[sx];
+ if (dx + 1 == n)
break;
+ dx++;
}
+ dst[dx] = '\0';
}
static int __init vmcmd_on_reboot_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_reboot, str, 127);
- vmcmd_on_reboot[127] = 0;
+ strscpy_skip_quote(vmcmd_on_reboot, str, sizeof(vmcmd_on_reboot));
on_reboot_trigger.action = &vmcmd_action;
return 1;
}
@@ -1999,10 +2278,9 @@ __setup("vmreboot=", vmcmd_on_reboot_setup);
static int __init vmcmd_on_panic_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_panic, str, 127);
- vmcmd_on_panic[127] = 0;
+ strscpy_skip_quote(vmcmd_on_panic, str, sizeof(vmcmd_on_panic));
on_panic_trigger.action = &vmcmd_action;
return 1;
}
@@ -2010,10 +2288,9 @@ __setup("vmpanic=", vmcmd_on_panic_setup);
static int __init vmcmd_on_halt_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_halt, str, 127);
- vmcmd_on_halt[127] = 0;
+ strscpy_skip_quote(vmcmd_on_halt, str, sizeof(vmcmd_on_halt));
on_halt_trigger.action = &vmcmd_action;
return 1;
}
@@ -2021,10 +2298,9 @@ __setup("vmhalt=", vmcmd_on_halt_setup);
static int __init vmcmd_on_poff_setup(char *str)
{
- if (!MACHINE_IS_VM)
+ if (!machine_is_vm())
return 1;
- strncpy_skip_quote(vmcmd_on_poff, str, 127);
- vmcmd_on_poff[127] = 0;
+ strscpy_skip_quote(vmcmd_on_poff, str, sizeof(vmcmd_on_poff));
on_poff_trigger.action = &vmcmd_action;
return 1;
}
@@ -2052,6 +2328,11 @@ void __init setup_ipl(void)
ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
break;
+ case IPL_TYPE_ECKD:
+ case IPL_TYPE_ECKD_DUMP:
+ ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid;
+ ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno;
+ break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
ipl_info.data.fcp.dev_id.ssid = 0;
@@ -2078,7 +2359,7 @@ void s390_reset_system(void)
set_prefix(0);
/* Disable lowcore protection */
- __ctl_clear_bit(0, 28);
+ local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
diag_amode31_ops.diag308_reset();
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 45393919fe61..bdf9c7cb5685 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
@@ -25,10 +26,13 @@
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/stacktrace.h>
#include <asm/softirq_stack.h>
+#include <asm/vtime.h>
+#include <asm/asm.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -75,13 +79,13 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQEXT_WTI, .name = "WTI", .desc = "[EXT] Warning Track"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
{.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
- {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
@@ -99,8 +103,8 @@ static const struct irq_class irqclass_sub_desc[] = {
static void do_IRQ(struct pt_regs *regs, int irq)
{
- if (tod_after_eq(S390_lowcore.int_clock,
- S390_lowcore.clock_comparator))
+ if (tod_after_eq(get_lowcore()->int_clock,
+ get_lowcore()->clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
@@ -110,7 +114,7 @@ static int on_async_stack(void)
{
unsigned long frame = current_frame_address();
- return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+ return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
}
static void do_irq_async(struct pt_regs *regs, int irq)
@@ -118,7 +122,7 @@ static void do_irq_async(struct pt_regs *regs, int irq)
if (on_async_stack()) {
do_IRQ(regs, irq);
} else {
- call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+ call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
struct pt_regs *, regs, int, irq);
}
}
@@ -127,36 +131,41 @@ static int irq_pending(struct pt_regs *regs)
{
int cc;
- asm volatile("tpi 0\n"
- "ipm %0" : "=d" (cc) : : "cc");
- return cc >> 28;
+ asm volatile(
+ " tpi 0\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ :
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
void noinstr do_io_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
+ set_cpu_flag(CIF_NOHZ_DELAY);
do {
- regs->tpi_info = S390_lowcore.tpi_info;
- if (S390_lowcore.tpi_info.adapter_IO)
+ regs->tpi_info = get_lowcore()->tpi_info;
+ if (get_lowcore()->tpi_info.adapter_IO)
do_irq_async(regs, THIN_INTERRUPT);
else
do_irq_async(regs, IO_INTERRUPT);
- } while (MACHINE_IS_LPAR && irq_pending(regs));
+ } while (machine_is_lpar() && irq_pending(regs));
irq_exit_rcu();
@@ -171,21 +180,21 @@ void noinstr do_ext_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
struct pt_regs *old_regs = set_irq_regs(regs);
- int from_idle;
+ bool from_idle;
irq_enter_rcu();
if (user_mode(regs)) {
update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
+ if (cpu_has_bear())
current->thread.last_break = regs->last_break;
}
- regs->int_code = S390_lowcore.ext_int_code_addr;
- regs->int_parm = S390_lowcore.ext_params;
- regs->int_parm_long = S390_lowcore.ext_params2;
+ regs->int_code = get_lowcore()->ext_int_code_addr;
+ regs->int_parm = get_lowcore()->ext_params;
+ regs->int_parm_long = get_lowcore()->ext_params2;
- from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
+ from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
account_idle_time_irq();
@@ -250,7 +259,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
goto out;
}
- if (index < nr_irqs) {
+ if (index < irq_get_nr_irqs()) {
show_msi_interrupt(p, index);
goto out;
}
@@ -385,7 +394,7 @@ void irq_subclass_register(enum irq_subclass subclass)
{
spin_lock(&irq_subclass_lock);
if (!irq_subclass_refcount[subclass])
- ctl_set_bit(0, subclass);
+ system_ctl_set_bit(0, subclass);
irq_subclass_refcount[subclass]++;
spin_unlock(&irq_subclass_lock);
}
@@ -396,7 +405,7 @@ void irq_subclass_unregister(enum irq_subclass subclass)
spin_lock(&irq_subclass_lock);
irq_subclass_refcount[subclass]--;
if (!irq_subclass_refcount[subclass])
- ctl_clear_bit(0, subclass);
+ system_ctl_clear_bit(0, subclass);
spin_unlock(&irq_subclass_lock);
}
EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 9da6fa30c447..4d364de43799 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -40,8 +40,10 @@ static int kexec_file_add_kernel_elf(struct kimage *image,
buf.bufsz = phdr->p_filesz;
buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
+#endif
buf.memsz = phdr->p_memsz;
data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index af23eff5774d..a32ce8bea745 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -24,8 +24,10 @@ static int kexec_file_add_kernel_image(struct kimage *image,
buf.bufsz = image->kernel_buf_len;
buf.mem = 0;
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
+#endif
buf.memsz = buf.bufsz;
data->kernel_buf = image->kernel_buf;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 0032bdbe8e3f..c450120b4474 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,11 +9,11 @@
#define pr_fmt(fmt) "kprobes: " fmt
-#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/stop_machine.h>
+#include <linux/cpufeature.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
#include <linux/extable.h>
@@ -21,6 +21,8 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
+#include <linux/execmem.h>
+#include <asm/text-patching.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -31,41 +33,17 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
-DEFINE_INSN_CACHE_OPS(s390_insn);
-
-static int insn_page_in_use;
-
void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
- __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X);
+ set_memory_rox((unsigned long)page, 1);
return page;
}
-static void *alloc_s390_insn_page(void)
-{
- if (xchg(&insn_page_in_use, 1) == 1)
- return NULL;
- return &kprobes_insn_page;
-}
-
-static void free_s390_insn_page(void *page)
-{
- xchg(&insn_page_in_use, 0);
-}
-
-struct kprobe_insn_cache kprobe_s390_insn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
- .alloc = alloc_s390_insn_page,
- .free = free_s390_insn_page,
- .pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
- .insn_size = MAX_INSN_SIZE,
-};
-
static void copy_instruction(struct kprobe *p)
{
kprobe_opcode_t insn[MAX_INSN_SIZE];
@@ -79,10 +57,10 @@ static void copy_instruction(struct kprobe *p)
if (probe_is_insn_relative_long(&insn[0])) {
/*
* For pc-relative instructions in RIL-b or RIL-c format patch
- * the RI2 displacement field. We have already made sure that
- * the insn slot for the patched instruction is within the same
- * 2GB area as the original instruction (either kernel image or
- * module area). Therefore the new displacement will always fit.
+ * the RI2 displacement field. The insn slot for the to be
+ * patched instruction is within the same 4GB area like the
+ * original instruction. Therefore the new displacement will
+ * always fit.
*/
disp = *(s32 *)&insn[1];
addr = (u64)(unsigned long)p->addr;
@@ -94,34 +72,6 @@ static void copy_instruction(struct kprobe *p)
}
NOKPROBE_SYMBOL(copy_instruction);
-static int s390_get_insn_slot(struct kprobe *p)
-{
- /*
- * Get an insn slot that is within the same 2GB area like the original
- * instruction. That way instructions with a 32bit signed displacement
- * field can be patched and executed within the insn slot.
- */
- p->ainsn.insn = NULL;
- if (is_kernel((unsigned long)p->addr))
- p->ainsn.insn = get_s390_insn_slot();
- else if (is_module_addr(p->addr))
- p->ainsn.insn = get_insn_slot();
- return p->ainsn.insn ? 0 : -ENOMEM;
-}
-NOKPROBE_SYMBOL(s390_get_insn_slot);
-
-static void s390_free_insn_slot(struct kprobe *p)
-{
- if (!p->ainsn.insn)
- return;
- if (is_kernel((unsigned long)p->addr))
- free_s390_insn_slot(p->ainsn.insn, 0);
- else
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
-}
-NOKPROBE_SYMBOL(s390_free_insn_slot);
-
/* Check if paddr is at an instruction boundary */
static bool can_probe(unsigned long paddr)
{
@@ -175,7 +125,8 @@ int arch_prepare_kprobe(struct kprobe *p)
/* Make sure the probe isn't going on a difficult instruction */
if (probe_is_prohibited_opcode(p->addr))
return -EINVAL;
- if (s390_get_insn_slot(p))
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
return -ENOMEM;
copy_instruction(p);
return 0;
@@ -203,7 +154,12 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
@@ -211,13 +167,21 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
- stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ if (cpu_has_seq_insn()) {
+ swap_instruction(&args);
+ text_poke_sync();
+ } else {
+ stop_machine_cpuslocked(swap_instruction, &args, NULL);
+ }
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
void arch_remove_kprobe(struct kprobe *p)
{
- s390_free_insn_slot(p);
+ if (!p->ainsn.insn)
+ return;
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
}
NOKPROBE_SYMBOL(arch_remove_kprobe);
@@ -225,20 +189,27 @@ static void enable_singlestep(struct kprobe_ctlblk *kcb,
struct pt_regs *regs,
unsigned long ip)
{
- struct per_regs per_kprobe;
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } per_kprobe;
/* Set up the PER control registers %cr9-%cr11 */
- per_kprobe.control = PER_EVENT_IFETCH;
- per_kprobe.start = ip;
- per_kprobe.end = ip;
+ per_kprobe.control.val = PER_EVENT_IFETCH;
+ per_kprobe.start.val = ip;
+ per_kprobe.end.val = ip;
/* Save control regs and psw mask */
- __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_store(9, 11, kcb->kprobe_saved_ctl);
kcb->kprobe_saved_imask = regs->psw.mask &
(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
/* Set PER control regs, turns on single step for the given address */
- __ctl_load(per_kprobe, 9, 11);
+ __local_ctl_load(9, 11, per_kprobe.regs);
regs->psw.mask |= PSW_MASK_PER;
regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
regs->psw.addr = ip;
@@ -250,7 +221,7 @@ static void disable_singlestep(struct kprobe_ctlblk *kcb,
unsigned long ip)
{
/* Restore control regs and psw mask, set new psw address */
- __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ __local_ctl_load(9, 11, kcb->kprobe_saved_ctl);
regs->psw.mask &= ~PSW_MASK_PER;
regs->psw.mask |= kcb->kprobe_saved_imask;
regs->psw.addr = ip;
@@ -279,19 +250,10 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb)
{
__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->prev_kprobe.kp = NULL;
}
NOKPROBE_SYMBOL(pop_kprobe);
-void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
-{
- ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14];
- ri->fp = (void *)regs->gprs[15];
-
- /* Replace the return addr with trampoline addr */
- regs->gprs[14] = (unsigned long)&__kretprobe_trampoline;
-}
-NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-
static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
switch (kcb->kprobe_status) {
@@ -372,26 +334,6 @@ static int kprobe_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kprobe_handler);
-void arch_kretprobe_fixup_return(struct pt_regs *regs,
- kprobe_opcode_t *correct_ret_addr)
-{
- /* Replace fake return address with real one. */
- regs->gprs[14] = (unsigned long)correct_ret_addr;
-}
-NOKPROBE_SYMBOL(arch_kretprobe_fixup_return);
-
-/*
- * Called from __kretprobe_trampoline
- */
-void trampoline_probe_handler(struct pt_regs *regs)
-{
- kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]);
-}
-NOKPROBE_SYMBOL(trampoline_probe_handler);
-
-/* assembler function that handles the kretprobes must not be probed itself */
-NOKPROBE_SYMBOL(__kretprobe_trampoline);
-
/*
* Called after single-stepping. p->addr is the address of the
* instruction whose first byte has been replaced by the "breakpoint"
@@ -433,12 +375,11 @@ static int post_kprobe_handler(struct pt_regs *regs)
if (!p)
return 0;
+ resume_execution(p, regs);
if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
-
- resume_execution(p, regs);
pop_kprobe(kcb);
preempt_enable_no_resched();
@@ -549,6 +490,12 @@ int __init arch_init_kprobes(void)
return 0;
}
+int __init arch_populate_kprobe_blacklist(void)
+{
+ return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+}
+
int arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
deleted file mode 100644
index f6cb022ef8c8..000000000000
--- a/arch/s390/kernel/kprobes_insn_page.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-
-/*
- * insn_page is a special 4k aligned dummy function for kprobes.
- * It will contain all kprobed instructions that are out-of-line executed.
- * The page must be within the kernel image to guarantee that the
- * out-of-line instructions are within 2GB distance of their original
- * location. Using a dummy function ensures that the insn_page is within
- * the text section of the kernel and mapped read-only/executable from
- * the beginning on, thus avoiding to split large mappings if the page
- * would be in the data section instead.
- */
- .section .kprobes.text, "ax"
- .align 4096
-ENTRY(kprobes_insn_page)
- .rept 2048
- .word 0x07fe
- .endr
-ENDPROC(kprobes_insn_page)
- .previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6652e54cf3db..6d1ffca5f798 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -166,7 +166,7 @@ static struct timer_list lgr_timer;
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+ mod_timer(&lgr_timer, jiffies + secs_to_jiffies(LGR_TIMER_INTERVAL_SECS));
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..baeb3dcfc1c8 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -13,7 +13,12 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
+#include <linux/cpufeature.h>
+#include <asm/guarded_storage.h>
+#include <asm/machine.h>
+#include <asm/pfault.h>
#include <asm/cio.h>
+#include <asm/fpu.h>
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/ipl.h>
@@ -21,15 +26,15 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
-#include <asm/switch_to.h>
#include <asm/nmi.h>
#include <asm/sclp.h>
-typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long,
- unsigned long);
+typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
extern const unsigned char relocate_kernel[];
extern const unsigned long long relocate_kernel_len;
@@ -40,13 +45,16 @@ extern const unsigned long long relocate_kernel_len;
* Reset the system, copy boot CPU registers to absolute zero,
* and jump to the kdump image
*/
-static void __do_machine_kdump(void *image)
+static void __do_machine_kdump(void *data)
{
- int (*start_kdump)(int);
+ struct kimage *image = data;
+ purgatory_t purgatory;
unsigned long prefix;
+ purgatory = (purgatory_t)image->start;
+
/* store_status() saved the prefix register to lowcore */
- prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+ prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
/* Now do the reset */
s390_reset_system();
@@ -56,14 +64,12 @@ static void __do_machine_kdump(void *image)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
- (void *)(prefix + __LC_FPREGS_SAVE_AREA), 512);
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
+ phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
- __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
- start_kdump = (void *)((struct kimage *) image)->start;
- start_kdump(1);
+ call_nodat(1, int, purgatory, int, 1);
- /* Die if start_kdump returns */
+ /* Die if kdump returns */
disabled_wait();
}
@@ -87,16 +93,16 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
- if (MACHINE_HAS_VX)
+ mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
+ if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
- if (MACHINE_HAS_GS) {
- __ctl_store(cr2_old.val, 2, 2);
+ if (cpu_has_gs()) {
+ local_ctl_store(2, &cr2_old.reg);
cr2_new = cr2_old;
cr2_new.gse = 1;
- __ctl_load(cr2_new.val, 2, 2);
+ local_ctl_load(2, &cr2_new.reg);
save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
- __ctl_load(cr2_old.val, 2, 2);
+ local_ctl_load(2, &cr2_old.reg);
}
/*
* To create a good backchain for this CPU in the dump store_status
@@ -110,18 +116,6 @@ static noinline void __machine_kdump(void *image)
store_status(__do_machine_kdump, image);
}
-static unsigned long do_start_kdump(unsigned long addr)
-{
- struct kimage *image = (struct kimage *) addr;
- int (*start_kdump)(int) = (void *)image->start;
- int rc;
-
- __arch_local_irq_stnsm(0xfb); /* disable DAT */
- rc = start_kdump(0);
- __arch_local_irq_stosm(0x04); /* enable DAT */
- return rc;
-}
-
#endif /* CONFIG_CRASH_DUMP */
/*
@@ -130,12 +124,10 @@ static unsigned long do_start_kdump(unsigned long addr)
static bool kdump_csum_valid(struct kimage *image)
{
#ifdef CONFIG_CRASH_DUMP
+ purgatory_t purgatory = (purgatory_t)image->start;
int rc;
- preempt_disable();
- rc = call_on_stack(1, S390_lowcore.nodat_stack, unsigned long, do_start_kdump,
- unsigned long, (unsigned long)image);
- preempt_enable();
+ rc = call_nodat(1, int, purgatory, int, 0);
return rc == 0;
#else
return false;
@@ -188,7 +180,7 @@ void arch_kexec_unprotect_crashkres(void)
static int machine_kexec_prepare_kdump(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
diag10_range(PFN_DOWN(crashk_res.start),
PFN_DOWN(crashk_res.end - crashk_res.start + 1));
return 0;
@@ -209,7 +201,7 @@ int machine_kexec_prepare(struct kimage *image)
return -EINVAL;
/* Get the destination where the assembler code should be copied to.*/
- reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+ reboot_code_buffer = page_to_virt(image->control_code_page);
/* Then copy it */
memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
@@ -220,17 +212,6 @@ void machine_kexec_cleanup(struct kimage *image)
{
}
-void arch_crash_save_vmcoreinfo(void)
-{
- VMCOREINFO_SYMBOL(lowcore_ptr);
- VMCOREINFO_SYMBOL(high_memory);
- VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
- vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
- vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
- vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
- put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
-}
-
void machine_shutdown(void)
{
}
@@ -245,19 +226,20 @@ void machine_crash_shutdown(struct pt_regs *regs)
*/
static void __do_machine_kexec(void *data)
{
- unsigned long diag308_subcode;
- relocate_kernel_t data_mover;
+ unsigned long data_mover, entry, diag308_subcode;
struct kimage *image = data;
- s390_reset_system();
- data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
-
- __arch_local_irq_stnsm(0xfb); /* disable DAT - avoid no-execute */
- /* Call the moving routine */
+ data_mover = page_to_phys(image->control_code_page);
+ entry = virt_to_phys(&image->head);
diag308_subcode = DIAG308_CLEAR_RESET;
if (sclp.has_iplcc)
diag308_subcode |= DIAG308_FLAG_EI;
- (*data_mover)(&image->head, image->start, diag308_subcode);
+ s390_reset_system();
+
+ call_nodat(3, void, (relocate_kernel_t)data_mover,
+ unsigned long, entry,
+ unsigned long, image->start,
+ unsigned long, diag308_subcode);
/* Die if kexec returns */
disabled_wait();
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fc6d5f58debe..c2bac14dd668 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -105,6 +105,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
if (ret)
return ret;
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH) {
u64 crash_size;
@@ -121,6 +122,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
sizeof(crash_size),
false);
}
+#endif
return ret;
}
@@ -134,8 +136,10 @@ static int kexec_file_add_purgatory(struct kimage *image,
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
+#endif
ret = kexec_load_purgatory(image, &buf);
if (ret)
@@ -158,8 +162,10 @@ static int kexec_file_add_initrd(struct kimage *image,
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
+#endif
buf.memsz = buf.bufsz;
data->parm->initrd_start = data->memsz;
@@ -187,10 +193,8 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
- if (image->type == KEXEC_TYPE_CRASH)
- buf.mem += crashk_res.start;
- ptr = (void *)ipl_cert_list_addr;
+ ptr = __va(ipl_cert_list_addr);
end = ptr + ipl_cert_list_size;
ncerts = 0;
while (ptr < end) {
@@ -202,7 +206,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
addr = data->memsz + data->report->size;
addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
- ptr = (void *)ipl_cert_list_addr;
+ ptr = __va(ipl_cert_list_addr);
while (ptr < end) {
len = *(unsigned int *)ptr;
ptr += sizeof(len);
@@ -225,6 +229,11 @@ static int kexec_file_add_ipl_report(struct kimage *image,
data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+#ifdef CONFIG_CRASH_DUMP
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+#endif
+
ret = kexec_add_buffer(&buf);
out:
return ret;
@@ -267,10 +276,12 @@ void *kexec_file_add_components(struct kimage *image,
memcpy(data.parm->command_line, image->cmdline_buf,
image->cmdline_buf_len);
+#ifdef CONFIG_CRASH_DUMP
if (image->type == KEXEC_TYPE_CRASH) {
data.parm->oldmem_base = crashk_res.start;
data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
}
+#endif
if (image->initrd_buf) {
ret = kexec_file_add_initrd(image, &data);
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 4786bfe02144..1fec370fecf4 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -9,15 +9,21 @@
#include <asm/ftrace.h>
#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
+#include <asm/march.h>
+#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS (STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS (STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW (STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2 (STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS (STACK_FREGS_PTREGS + __PT_FLAGS)
-#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
-#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2)
-#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS)
/* packed stack: allocate just enough for r14, r15 and backchain */
#define TRACED_FUNC_FRAME_SIZE 24
@@ -28,9 +34,14 @@
.section .kprobes.text, "ax"
-ENTRY(ftrace_stub)
+SYM_FUNC_START(ftrace_stub)
BR_EX %r14
-ENDPROC(ftrace_stub)
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ lgr %r1, %r0
+ BR_EX %r1
+SYM_CODE_END(ftrace_stub_direct_tramp)
.macro ftrace_regs_entry, allregs=0
stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
@@ -48,23 +59,23 @@ ENDPROC(ftrace_stub)
stg %r1,__SF_BACKCHAIN(%r15)
stg %r0,(__SF_GPRS+8*8)(%r15)
stg %r15,(__SF_GPRS+9*8)(%r15)
- # allocate pt_regs and stack frame for ftrace_trace_function
- aghi %r15,-STACK_FRAME_SIZE
- stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
- xc STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15)
+ # allocate ftrace_regs and stack frame for ftrace_trace_function
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ xc STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
.if \allregs == 1
- stg %r14,(STACK_PTREGS_PSW)(%r15)
- mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+ stg %r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+ mvghi STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
.else
- xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
+ xc STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
.endif
lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
aghi %r1,-TRACED_FUNC_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
- stg %r0,(STACK_PTREGS_PSW+8)(%r15)
- stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+ stg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
.endm
SYM_CODE_START(ftrace_regs_caller)
@@ -78,7 +89,7 @@ SYM_CODE_START(ftrace_caller)
SYM_CODE_END(ftrace_caller)
SYM_CODE_START(ftrace_common)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+#ifdef MARCH_HAS_Z196_FEATURES
aghik %r2,%r0,-MCOUNT_INSN_SIZE
lgrl %r4,function_trace_op
lgrl %r1,ftrace_func
@@ -91,30 +102,19 @@ SYM_CODE_START(ftrace_common)
lg %r1,0(%r1)
#endif
lgr %r3,%r14
- la %r5,STACK_PTREGS(%r15)
+ la %r5,STACK_FREGS(%r15)
BASR_EX %r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
- j .Lftrace_graph_caller_end
- lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
- lg %r4,(STACK_PTREGS_PSW+8)(%r15)
- brasl %r14,prepare_ftrace_return
- stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
-.Lftrace_graph_caller_end:
-#endif
- lg %r0,(STACK_PTREGS_PSW+8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- ltg %r1,STACK_PTREGS_ORIG_GPR2(%r15)
+ lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+#ifdef MARCH_HAS_Z196_FEATURES
+ ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
locgrz %r1,%r0
#else
- lg %r1,STACK_PTREGS_ORIG_GPR2(%r15)
+ lg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
ltgr %r1,%r1
jnz 0f
lgr %r1,%r0
#endif
-0: lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+0: lmg %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
BR_EX %r1
SYM_CODE_END(ftrace_common)
@@ -123,10 +123,14 @@ SYM_CODE_END(ftrace_common)
SYM_FUNC_START(return_to_handler)
stmg %r2,%r5,32(%r15)
lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
+ # allocate ftrace_regs and stack frame for ftrace_return_to_handler
+ aghi %r15,-STACK_FRAME_SIZE_FREGS
stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+ stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+ la %r2,STACK_FRAME_OVERHEAD(%r15)
brasl %r14,ftrace_return_to_handler
- aghi %r15,STACK_FRAME_OVERHEAD
+ aghi %r15,STACK_FRAME_SIZE_FREGS
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
@@ -135,16 +139,31 @@ SYM_FUNC_END(return_to_handler)
#endif
#endif /* CONFIG_FUNCTION_TRACER */
-#ifdef CONFIG_KPROBES
-
-SYM_FUNC_START(__kretprobe_trampoline)
-
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br)
+ lmg %r0,%r1,2(%r1)
+ br %r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br)
+
+#ifdef CONFIG_EXPOLINE
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl)
+ lmg %r0,%r1,2(%r1)
+ exrl %r0,0f
+ j .
+0: br %r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_RETHOOK
+
+SYM_CODE_START(arch_rethook_trampoline)
stg %r14,(__SF_GPRS+8*8)(%r15)
- lay %r15,-STACK_FRAME_SIZE(%r15)
+ lay %r15,-STACK_FRAME_SIZE_PTREGS(%r15)
stmg %r0,%r14,STACK_PTREGS_GPRS(%r15)
# store original stack pointer in backchain and pt_regs
- lay %r7,STACK_FRAME_SIZE(%r15)
+ lay %r7,STACK_FRAME_SIZE_PTREGS(%r15)
stg %r7,__SF_BACKCHAIN(%r15)
stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15)
@@ -152,16 +171,15 @@ SYM_FUNC_START(__kretprobe_trampoline)
epsw %r2,%r3
risbg %r3,%r2,0,31,32
stg %r3,STACK_PTREGS_PSW(%r15)
- larl %r1,__kretprobe_trampoline
+ larl %r1,arch_rethook_trampoline
stg %r1,STACK_PTREGS_PSW+8(%r15)
lay %r2,STACK_PTREGS(%r15)
- brasl %r14,trampoline_probe_handler
+ brasl %r14,arch_rethook_trampoline_callback
mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
lmg %r0,%r15,STACK_PTREGS_GPRS(%r15)
lpswe __SF_EMPTY(%r15)
+SYM_CODE_END(arch_rethook_trampoline)
-SYM_FUNC_END(__kretprobe_trampoline)
-
-#endif /* CONFIG_KPROBES */
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 2d159b32885b..91e207b50394 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,11 +21,13 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <linux/memory.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
#include <asm/ftrace.lds.h>
#include <asm/set_memory.h>
+#include <asm/setup.h>
#if 0
#define DEBUGP printk
@@ -35,27 +37,10 @@
#define PLT_ENTRY_SIZE 22
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
- gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
- __builtin_return_address(0));
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
- return p;
-}
-
#ifdef CONFIG_FUNCTION_TRACER
void module_arch_cleanup(struct module *mod)
{
- module_memfree(mod->arch.trampolines_start);
+ execmem_free(mod->arch.trampolines_start);
}
#endif
@@ -126,6 +111,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
Elf_Rela *rela;
char *strings;
int nrela, i, j;
+ struct module_memory *mod_mem;
/* Find symbol table and string table. */
symtab = NULL;
@@ -173,14 +159,15 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
/* Increase core size by size of got & plt and set start
offsets for got and plt. */
- me->core_layout.size = ALIGN(me->core_layout.size, 4);
- me->arch.got_offset = me->core_layout.size;
- me->core_layout.size += me->arch.got_size;
- me->arch.plt_offset = me->core_layout.size;
+ mod_mem = &me->mem[MOD_TEXT];
+ mod_mem->size = ALIGN(mod_mem->size, 4);
+ me->arch.got_offset = mod_mem->size;
+ mod_mem->size += me->arch.got_size;
+ me->arch.plt_offset = mod_mem->size;
if (me->arch.plt_size) {
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
me->arch.plt_size += PLT_ENTRY_SIZE;
- me->core_layout.size += me->arch.plt_size;
+ mod_mem->size += me->arch.plt_size;
}
return 0;
}
@@ -304,7 +291,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
if (info->got_initialized == 0) {
- Elf_Addr *gotent = me->core_layout.base +
+ Elf_Addr *gotent = me->mem[MOD_TEXT].base +
me->arch.got_offset +
info->got_offset;
@@ -329,7 +316,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->core_layout.base - loc;
+ val += (Elf_Addr)me->mem[MOD_TEXT].base +
+ me->arch.got_offset - loc;
rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
@@ -345,7 +333,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
char *plt_base;
char *ip;
- plt_base = me->core_layout.base + me->arch.plt_offset;
+ plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset;
ip = plt_base + info->plt_offset;
*(int *)insn = 0x0d10e310; /* basr 1,0 */
*(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
@@ -375,7 +363,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val - loc + 0xffffUL < 0x1ffffeUL) ||
(r_type == R_390_PLT32DBL &&
val - loc + 0xffffffffULL < 0x1fffffffeULL)))
- val = (Elf_Addr) me->core_layout.base +
+ val = (Elf_Addr) me->mem[MOD_TEXT].base +
me->arch.plt_offset +
info->plt_offset;
val += rela->r_addend - loc;
@@ -397,7 +385,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
case R_390_GOTOFF64: /* 64 bit offset to GOT. */
val = val + rela->r_addend -
- ((Elf_Addr) me->core_layout.base + me->arch.got_offset);
+ ((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOTOFF32)
@@ -407,7 +395,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
- val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
+ val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
rc = apply_rela_bits(loc, val, 1, 32, 0, write);
@@ -486,10 +474,10 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
- start = module_alloc(numpages * PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if (!start)
return -ENOMEM;
- set_memory_ro((unsigned long)start, numpages);
+ set_memory_rox((unsigned long)start, numpages);
end = start + size;
me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
@@ -515,7 +503,7 @@ int module_finalize(const Elf_Ehdr *hdr,
!nospec_disable && me->arch.plt_size) {
unsigned int *ij;
- ij = me->core_layout.base + me->arch.plt_offset +
+ ij = me->mem[MOD_TEXT].base + me->arch.plt_offset +
me->arch.plt_size - PLT_ENTRY_SIZE;
ij[0] = 0xc6000000; /* exrl %r0,.+10 */
ij[1] = 0x0005a7f4; /* j . */
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 53ed3884fe64..3da371c144eb 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -9,8 +9,10 @@
*/
#include <linux/kernel_stat.h>
+#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/entry-common.h>
#include <linux/hardirq.h>
#include <linux/log2.h>
#include <linux/kprobes.h>
@@ -18,20 +20,19 @@
#include <linux/time.h>
#include <linux/module.h>
#include <linux/sched/signal.h>
-
+#include <linux/kvm_host.h>
#include <linux/export.h>
#include <asm/lowcore.h>
+#include <asm/ctlreg.h>
+#include <asm/fpu.h>
#include <asm/smp.h>
#include <asm/stp.h>
#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/crw.h>
-#include <asm/switch_to.h>
-#include <asm/ctl_reg.h>
#include <asm/asm-offsets.h>
#include <asm/pai.h>
-
-#include <linux/kvm_host.h>
+#include <asm/vtime.h>
struct mcck_struct {
unsigned int kill_task : 1;
@@ -42,19 +43,10 @@ struct mcck_struct {
};
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
-static struct kmem_cache *mcesa_cache;
-static unsigned long mcesa_origin_lc;
static inline int nmi_needs_mcesa(void)
{
- return MACHINE_HAS_VX || MACHINE_HAS_GS;
-}
-
-static inline unsigned long nmi_get_mcesa_size(void)
-{
- if (MACHINE_HAS_GS)
- return MCESA_MAX_SIZE;
- return MCESA_MIN_SIZE;
+ return cpu_has_vx() || cpu_has_gs();
}
/*
@@ -63,47 +55,34 @@ static inline unsigned long nmi_get_mcesa_size(void)
* structure. The structure is required for machine check happening
* early in the boot process.
*/
-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
void __init nmi_alloc_mcesa_early(u64 *mcesad)
{
if (!nmi_needs_mcesa())
return;
*mcesad = __pa(&boot_mcesa);
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
*mcesad |= ilog2(MCESA_MAX_SIZE);
}
-static void __init nmi_alloc_cache(void)
+int nmi_alloc_mcesa(u64 *mcesad)
{
unsigned long size;
-
- if (!nmi_needs_mcesa())
- return;
- size = nmi_get_mcesa_size();
- if (size > MCESA_MIN_SIZE)
- mcesa_origin_lc = ilog2(size);
- /* create slab cache for the machine-check-extended-save-areas */
- mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
- if (!mcesa_cache)
- panic("Couldn't create nmi save area cache");
-}
-
-int __ref nmi_alloc_mcesa(u64 *mcesad)
-{
- unsigned long origin;
+ void *origin;
*mcesad = 0;
if (!nmi_needs_mcesa())
return 0;
- if (!mcesa_cache)
- nmi_alloc_cache();
- origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
+ size = cpu_has_gs() ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+ origin = kmalloc(size, GFP_KERNEL);
if (!origin)
return -ENOMEM;
/* The pointer is stored with mcesa_bits ORed in */
- kmemleak_not_leak((void *) origin);
- *mcesad = __pa(origin) | mcesa_origin_lc;
+ kmemleak_not_leak(origin);
+ *mcesad = __pa(origin);
+ if (cpu_has_gs())
+ *mcesad |= ilog2(MCESA_MAX_SIZE);
return 0;
}
@@ -111,12 +90,65 @@ void nmi_free_mcesa(u64 *mcesad)
{
if (!nmi_needs_mcesa())
return;
- kmem_cache_free(mcesa_cache, __va(*mcesad & MCESA_ORIGIN_MASK));
+ kfree(__va(*mcesad & MCESA_ORIGIN_MASK));
+}
+
+static __always_inline char *nmi_puts(char *dest, const char *src)
+{
+ while (*src)
+ *dest++ = *src++;
+ *dest = 0;
+ return dest;
+}
+
+static __always_inline char *u64_to_hex(char *dest, u64 val)
+{
+ int i, num;
+
+ for (i = 1; i <= 16; i++) {
+ num = (val >> (64 - 4 * i)) & 0xf;
+ if (num >= 10)
+ *dest++ = 'A' + num - 10;
+ else
+ *dest++ = '0' + num;
+ }
+ *dest = 0;
+ return dest;
}
static notrace void s390_handle_damage(void)
{
+ struct lowcore *lc = get_lowcore();
+ union ctlreg0 cr0, cr0_new;
+ char message[100];
+ psw_t psw_save;
+ char *ptr;
+
smp_emergency_stop();
+ diag_amode31_ops.diag308_reset();
+ ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
+ u64_to_hex(ptr, lc->mcck_interruption_code);
+
+ /*
+ * Disable low address protection and make machine check new PSW a
+ * disabled wait PSW. Any additional machine check cannot be handled.
+ */
+ local_ctl_store(0, &cr0.reg);
+ cr0_new = cr0;
+ cr0_new.lap = 0;
+ local_ctl_load(0, &cr0_new.reg);
+ psw_save = lc->mcck_new_psw;
+ psw_bits(lc->mcck_new_psw).io = 0;
+ psw_bits(lc->mcck_new_psw).ext = 0;
+ psw_bits(lc->mcck_new_psw).wait = 1;
+ sclp_emergency_printk(message);
+
+ /*
+ * Restore machine check new PSW and control register 0 to original
+ * values. This makes possible system dump analysis easier.
+ */
+ lc->mcck_new_psw = psw_save;
+ local_ctl_load(0, &cr0.reg);
disabled_wait();
while (1);
}
@@ -126,19 +158,20 @@ NOKPROBE_SYMBOL(s390_handle_damage);
* Main machine check handler function. Will be called with interrupts disabled
* and machine checks enabled.
*/
-void __s390_handle_mcck(void)
+void s390_handle_mcck(void)
{
struct mcck_struct mcck;
+ unsigned long mflags;
/*
* Disable machine checks and get the current state of accumulated
* machine checks. Afterwards delete the old state and enable machine
* checks again.
*/
- local_mcck_disable();
+ local_mcck_save(mflags);
mcck = *this_cpu_ptr(&cpu_mcck);
memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
- local_mcck_enable();
+ local_mcck_restore(mflags);
if (mcck.channel_report)
crw_handle_channel_report();
@@ -155,203 +188,80 @@ void __s390_handle_mcck(void)
static int mchchk_wng_posted = 0;
/* Use single cpu clear, as we cannot handle smp here. */
- __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ local_ctl_clear_bit(14, CR14_WARNING_SUBMASK_BIT);
if (xchg(&mchchk_wng_posted, 1) == 0)
kill_cad_pid(SIGPWR, 1);
}
if (mcck.stp_queue)
stp_queue_work();
if (mcck.kill_task) {
- local_irq_enable();
printk(KERN_EMERG "mcck: Terminating task because of machine "
"malfunction (code 0x%016lx).\n", mcck.mcck_code);
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
current->comm, current->pid);
- make_task_dead(SIGSEGV);
+ if (is_global_init(current))
+ panic("mcck: Attempting to kill init!\n");
+ do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
}
}
-void noinstr s390_handle_mcck(struct pt_regs *regs)
-{
- trace_hardirqs_off();
- pai_kernel_enter(regs);
- __s390_handle_mcck();
- pai_kernel_exit(regs);
- trace_hardirqs_on();
-}
-/*
- * returns 0 if all required registers are available
- * returns 1 otherwise
+/**
+ * nmi_registers_valid - verify if registers are valid
+ * @mci: machine check interruption code
+ *
+ * Inspect a machine check interruption code and verify if all required
+ * registers are valid. For some registers the corresponding validity bit is
+ * ignored and the registers are set to the expected value.
+ * Returns true if all registers are valid, otherwise false.
*/
-static int notrace s390_validate_registers(union mci mci, int umode)
+static bool notrace nmi_registers_valid(union mci mci)
{
- struct mcesa *mcesa;
- void *fpt_save_area;
union ctlreg2 cr2;
- int kill_task;
- u64 zero;
-
- kill_task = 0;
- zero = 0;
- if (!mci.gr) {
- /*
- * General purpose registers couldn't be restored and have
- * unknown contents. Stop system or terminate process.
- */
- if (!umode)
- s390_handle_damage();
- kill_task = 1;
- }
- if (!mci.fp) {
- /*
- * Floating point registers can't be restored. If the
- * kernel currently uses floating point registers the
- * system is stopped. If the process has its floating
- * pointer registers loaded it is terminated.
- */
- if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
- s390_handle_damage();
- if (!test_cpu_flag(CIF_FPU))
- kill_task = 1;
- }
- fpt_save_area = &S390_lowcore.floating_pt_save_area;
- if (!mci.fc) {
- /*
- * Floating point control register can't be restored.
- * If the kernel currently uses the floating pointer
- * registers and needs the FPC register the system is
- * stopped. If the process has its floating pointer
- * registers loaded it is terminated. Otherwise the
- * FPC is just validated.
- */
- if (S390_lowcore.fpu_flags & KERNEL_FPC)
- s390_handle_damage();
- asm volatile(
- " lfpc %0\n"
- :
- : "Q" (zero));
- if (!test_cpu_flag(CIF_FPU))
- kill_task = 1;
- } else {
- asm volatile(
- " lfpc %0\n"
- :
- : "Q" (S390_lowcore.fpt_creg_save_area));
- }
-
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
- if (!MACHINE_HAS_VX) {
- /* Validate floating point registers */
- asm volatile(
- " ld 0,0(%0)\n"
- " ld 1,8(%0)\n"
- " ld 2,16(%0)\n"
- " ld 3,24(%0)\n"
- " ld 4,32(%0)\n"
- " ld 5,40(%0)\n"
- " ld 6,48(%0)\n"
- " ld 7,56(%0)\n"
- " ld 8,64(%0)\n"
- " ld 9,72(%0)\n"
- " ld 10,80(%0)\n"
- " ld 11,88(%0)\n"
- " ld 12,96(%0)\n"
- " ld 13,104(%0)\n"
- " ld 14,112(%0)\n"
- " ld 15,120(%0)\n"
- :
- : "a" (fpt_save_area)
- : "memory");
- } else {
- /* Validate vector registers */
- union ctlreg0 cr0;
-
- /*
- * The vector validity must only be checked if not running a
- * KVM guest. For KVM guests the machine check is forwarded by
- * KVM and it is the responsibility of the guest to take
- * appropriate actions. The host vector or FPU values have been
- * saved by KVM and will be restored by KVM.
- */
- if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
- /*
- * Vector registers can't be restored. If the kernel
- * currently uses vector registers the system is
- * stopped. If the process has its vector registers
- * loaded it is terminated. Otherwise just validate
- * the registers.
- */
- if (S390_lowcore.fpu_flags & KERNEL_VXR)
- s390_handle_damage();
- if (!test_cpu_flag(CIF_FPU))
- kill_task = 1;
- }
- cr0.val = S390_lowcore.cregs_save_area[0];
- cr0.afp = cr0.vx = 1;
- __ctl_load(cr0.val, 0, 0);
- asm volatile(
- " la 1,%0\n"
- " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
- " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- :
- : "Q" (*(struct vx_array *)mcesa->vector_save_area)
- : "1");
- __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
- }
- /* Validate access registers */
- asm volatile(
- " lam 0,15,0(%0)\n"
- :
- : "a" (&S390_lowcore.access_regs_save_area)
- : "memory");
- if (!mci.ar) {
- /*
- * Access registers have unknown contents.
- * Terminating task.
- */
- kill_task = 1;
- }
- /* Validate guarded storage registers */
- cr2.val = S390_lowcore.cregs_save_area[2];
- if (cr2.gse) {
- if (!mci.gs) {
- /*
- * 2 cases:
- * - machine check in kernel or userspace
- * - machine check while running SIE (KVM guest)
- * For kernel or userspace the userspace values of
- * guarded storage control can not be recreated, the
- * process must be terminated.
- * For SIE the guest values of guarded storage can not
- * be recreated. This is either due to a bug or due to
- * GS being disabled in the guest. The guest will be
- * notified by KVM code and the guests machine check
- * handling must take care of this. The host values
- * are saved by KVM and are not affected.
- */
- if (!test_cpu_flag(CIF_MCCK_GUEST))
- kill_task = 1;
- } else {
- load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
- }
- }
/*
- * The getcpu vdso syscall reads CPU number from the programmable
+ * The getcpu vdso syscall reads the CPU number from the programmable
* field of the TOD clock. Disregard the TOD programmable register
- * validity bit and load the CPU number into the TOD programmable
- * field unconditionally.
+ * validity bit and load the CPU number into the TOD programmable field
+ * unconditionally.
*/
set_tod_programmable_field(raw_smp_processor_id());
- /* Validate clock comparator register */
- set_clock_comparator(S390_lowcore.clock_comparator);
-
+ /*
+ * Set the clock comparator register to the next expected value.
+ */
+ set_clock_comparator(get_lowcore()->clock_comparator);
+ if (!mci.gr || !mci.fp || !mci.fc)
+ return false;
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
+ if (!mci.ar)
+ return false;
+ /*
+ * Two cases for guarded storage registers:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of guarded storage
+ * control can not be recreated, the process must be terminated.
+ * For SIE the guest values of guarded storage can not be recreated.
+ * This is either due to a bug or due to GS being disabled in the
+ * guest. The guest will be notified by KVM code and the guests machine
+ * check handling must take care of this. The host values are saved by
+ * KVM and are not affected.
+ */
+ cr2.reg = get_lowcore()->cregs_save_area[2];
+ if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
+ return false;
if (!mci.ms || !mci.pm || !mci.ia)
- kill_task = 1;
-
- return kill_task;
+ return false;
+ return true;
}
-NOKPROBE_SYMBOL(s390_validate_registers);
+NOKPROBE_SYMBOL(nmi_registers_valid);
/*
* Backup the guest's machine check info to its description block
@@ -362,8 +272,7 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
struct sie_page *sie_page;
/* r14 contains the sie block, which was set in sie64a */
- struct kvm_s390_sie_block *sie_block =
- (struct kvm_s390_sie_block *) regs->gprs[14];
+ struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
if (sie_block == NULL)
/* Something's seriously wrong, stop system. */
@@ -371,11 +280,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
sie_page = container_of(sie_block, struct sie_page, sie_block);
mcck_backup = &sie_page->mcck_info;
- mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
- mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
- mcck_backup->failing_storage_address
- = S390_lowcore.failing_storage_address;
+ mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+ mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
}
NOKPROBE_SYMBOL(s390_backup_mcck_info);
@@ -390,23 +298,25 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info);
/*
* machine check handler.
*/
-int notrace s390_do_machine_check(struct pt_regs *regs)
+void notrace s390_do_machine_check(struct pt_regs *regs)
{
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
static unsigned long long last_ipd;
+ struct lowcore *lc = get_lowcore();
struct mcck_struct *mcck;
unsigned long long tmp;
+ irqentry_state_t irq_state;
union mci mci;
unsigned long mcck_dam_code;
int mcck_pending = 0;
- nmi_enter();
+ irq_state = irqentry_nmi_enter(regs);
if (user_mode(regs))
update_timer_mcck();
inc_irq_stat(NMI_NMI);
- mci.val = S390_lowcore.mcck_interruption_code;
+ mci.val = lc->mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
/*
@@ -449,7 +359,9 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
s390_handle_damage();
}
}
- if (s390_validate_registers(mci, user_mode(regs))) {
+ if (!nmi_registers_valid(mci)) {
+ if (!user_mode(regs))
+ s390_handle_damage();
/*
* Couldn't restore all register contents for the
* user space process -> mark task for termination.
@@ -472,13 +384,27 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
}
if (mci.ed && mci.ec) {
/* External damage */
- if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ if (lc->external_damage_code & (1U << ED_STP_SYNC))
mcck->stp_queue |= stp_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ if (lc->external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
mcck_pending = 1;
}
-
+ /*
+ * Reinject storage related machine checks into the guest if they
+ * happen when the guest is running.
+ */
+ if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+ /* Storage error uncorrected */
+ if (mci.se)
+ s390_handle_damage();
+ /* Storage key-error uncorrected */
+ if (mci.ke)
+ s390_handle_damage();
+ /* Storage degradation */
+ if (mci.ds && mci.fa)
+ s390_handle_damage();
+ }
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
@@ -503,24 +429,18 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
}
clear_cpu_flag(CIF_MCCK_GUEST);
- if (user_mode(regs) && mcck_pending) {
- nmi_exit();
- return 1;
- }
-
if (mcck_pending)
schedule_mcck_handler();
- nmi_exit();
- return 0;
+ irqentry_nmi_exit(regs, irq_state);
}
NOKPROBE_SYMBOL(s390_do_machine_check);
static int __init machine_check_init(void)
{
- ctl_set_bit(14, 25); /* enable external damage MCH */
- ctl_set_bit(14, 27); /* enable system recovery MCH */
- ctl_set_bit(14, 24); /* enable warning MCH */
+ system_ctl_set_bit(14, CR14_EXTERNAL_DAMAGE_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_RECOVERY_SUBMASK_BIT);
+ system_ctl_set_bit(14, CR14_WARNING_SUBMASK_BIT);
return 0;
}
early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 717bbcc056e5..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
static int __init nobp_setup_early(char *str)
{
bool enabled;
@@ -14,14 +16,14 @@ static int __init nobp_setup_early(char *str)
return rc;
if (enabled && test_facility(82)) {
/*
- * The user explicitely requested nobp=1, enable it and
+ * The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
- __set_facility(82, alt_stfle_fac_list);
+ nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
return 0;
}
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
@@ -114,10 +116,10 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
type = BRASL_EXPOLINE; /* brasl instruction */
else
continue;
- thunk = instr + (*(int *)(instr + 2)) * 2;
+ thunk = instr + (long)(*(int *)(instr + 2)) * 2;
if (thunk[0] == 0xc6 && thunk[1] == 0x00)
/* exrl %r0,<target-br> */
- br = thunk + (*(int *)(thunk + 2)) * 2;
+ br = thunk + (long)(*(int *)(thunk + 2)) * 2;
else
continue;
if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..5970dd3ee7c5 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -7,17 +7,17 @@
ssize_t cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return sysfs_emit(buf, "Mitigation: __user pointer sanitization\n");
}
ssize_t cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
{
if (test_facility(156))
- return sprintf(buf, "Mitigation: etokens\n");
+ return sysfs_emit(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
- return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
- return sprintf(buf, "Mitigation: limited branch prediction\n");
- return sprintf(buf, "Vulnerable\n");
+ return sysfs_emit(buf, "Mitigation: execute trampolines\n");
+ if (nobp_enabled())
+ return sysfs_emit(buf, "Mitigation: limited branch prediction\n");
+ return sysfs_emit(buf, "Vulnerable\n");
}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
index 23ab9f02f278..2fc40f97c0ad 100644
--- a/arch/s390/kernel/numa.c
+++ b/arch/s390/kernel/numa.c
@@ -14,9 +14,6 @@
#include <linux/node.h>
#include <asm/numa.h>
-struct pglist_data *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
void __init numa_setup(void)
{
int nid;
@@ -24,12 +21,8 @@ void __init numa_setup(void)
nodes_clear(node_possible_map);
node_set(0, node_possible_map);
node_set_online(0);
- for (nid = 0; nid < MAX_NUMNODES; nid++) {
- NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
- if (!NODE_DATA(nid))
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(pg_data_t), 8);
- }
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ NODE_DATA(nid) = memblock_alloc_or_panic(sizeof(pg_data_t), 8);
NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
NODE_DATA(0)->node_id = 0;
}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..c2a468986212 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,9 +13,13 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
#include <asm/asm-offsets.h>
+#include <asm/sections.h>
+#include <asm/ipl.h>
/*
* OS info structure has to be page aligned
@@ -28,7 +32,7 @@ static struct os_info os_info __page_aligned_data;
u32 os_info_csum(struct os_info *os_info)
{
int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
- return (__force u32)csum_partial(&os_info->version_major, size, 0);
+ return (__force u32)cksm(&os_info->version_major, size, 0);
}
/*
@@ -42,13 +46,24 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
}
/*
- * Add OS info entry and update checksum
+ * Add OS info data entry and update checksum
*/
-void os_info_entry_add(int nr, void *ptr, u64 size)
+void os_info_entry_add_data(int nr, void *ptr, u64 size)
{
os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
- os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+ os_info.entry[nr].csum = (__force u32)cksm(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info value entry and update checksum
+ */
+void os_info_entry_add_val(int nr, u64 value)
+{
+ os_info.entry[nr].val = value;
+ os_info.entry[nr].size = 0;
+ os_info.entry[nr].csum = 0;
os_info.csum = os_info_csum(&os_info);
}
@@ -57,13 +72,25 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
- void *ptr = &os_info;
+ struct lowcore *abs_lc;
+ BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE);
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
+ os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base);
+ os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset());
+ os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys);
+ os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap);
+ os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START);
+ os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END);
+ os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext);
+ os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end);
+ os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext));
os_info.csum = os_info_csum(&os_info);
- put_abs_lowcore(os_info, __pa(ptr));
+ abs_lc = get_abs_lowcore();
+ abs_lc->os_info = __pa(&os_info);
+ put_abs_lowcore(abs_lc);
}
#ifdef CONFIG_CRASH_DUMP
@@ -95,7 +122,7 @@ static void os_info_old_alloc(int nr, int align)
msg = "copy failed";
goto fail_free;
}
- csum = (__force u32)csum_partial(buf_align, size, 0);
+ csum = (__force u32)cksm(buf_align, size, 0);
if (csum != os_info_old->entry[nr].csum) {
msg = "checksum failed";
goto fail_free;
@@ -122,7 +149,7 @@ static void os_info_old_init(void)
if (os_info_init)
return;
- if (!oldmem_data.start)
+ if (!oldmem_data.start && !is_ipl_type_dump())
goto fail;
if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
goto fail;
@@ -154,7 +181,7 @@ fail:
}
/*
- * Return pointer to os infor entry and its size
+ * Return pointer to os info entry and its size
*/
void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f7dd3c849e68..6a262e198e35 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,7 +2,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2021
+ * Copyright IBM Corp. 2012, 2023
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
* Thomas Richter <tmricht@linux.ibm.com>
*/
@@ -16,14 +16,310 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
+#include <asm/cpu_mf.h>
#include <asm/hwctrset.h>
#include <asm/debug.h>
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */
+
+enum cpumf_ctr_set {
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ return stcctm(BASIC, range, dest);
+ case CPUMF_CTR_SET_USER:
+ return stcctm(PROBLEM_STATE, range, dest);
+ case CPUMF_CTR_SET_CRYPTO:
+ return stcctm(CRYPTO_ACTIVITY, range, dest);
+ case CPUMF_CTR_SET_EXT:
+ return stcctm(EXTENDED, range, dest);
+ case CPUMF_CTR_SET_MT_DIAG:
+ return stcctm(MT_DIAG_CLEARING, range, dest);
+ case CPUMF_CTR_SET_MAX:
+ return 3;
+ }
+ return 3;
+}
+
+struct cpu_cf_events {
+ refcount_t refcnt; /* Reference count */
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
+ unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
+};
+
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
+/*
+ * The CPU Measurement query counter information instruction contains
+ * information which varies per machine generation, but is constant and
+ * does not change when running on a particular machine, such as counter
+ * first and second version number. This is needed to determine the size
+ * of counter sets. Extract this information at device driver initialization.
+ */
+static struct cpumf_ctr_info cpumf_ctr_info;
+
+struct cpu_cf_ptr {
+ struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ * CPU hot plug remove can not happen. Event removal requires a close()
+ * first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ * All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+ struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+ if (p) {
+ struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+ return q->cpucf;
+ }
+ return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+ return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+ lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+ if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+ return;
+ free_percpu(cpu_cf_root.cfptr);
+ cpu_cf_root.cfptr = NULL;
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
+ __func__, refcount_read(&cpu_cf_root.refcnt),
+ !cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+ int rc = 0;
+
+ if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+ return rc;
+
+ /* The memory is already zeroed. */
+ cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+ if (cpu_cf_root.cfptr) {
+ refcount_set(&cpu_cf_root.refcnt, 1);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+
+ mutex_lock(&pmc_reserve_mutex);
+ /*
+ * When invoked via CPU hotplug handler, there might be no events
+ * installed or that particular CPU might not have an
+ * event installed. This anchor pointer can be NULL!
+ */
+ if (!cpu_cf_root.cfptr)
+ goto out;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+ /*
+ * Might be zero when called from CPU hotplug handler and no event
+ * installed on that CPU, but on different CPUs.
+ */
+ if (!cpuhw)
+ goto out;
+
+ if (refcount_dec_and_test(&cpuhw->refcnt)) {
+ kfree(cpuhw);
+ p->cpucf = NULL;
+ }
+ cpum_cf_free_root();
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+ int rc;
+
+ mutex_lock(&pmc_reserve_mutex);
+ rc = cpum_cf_alloc_root();
+ if (rc)
+ goto unlock;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+
+ if (!cpuhw) {
+ cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+ if (cpuhw) {
+ p->cpucf = cpuhw;
+ refcount_set(&cpuhw->refcnt, 1);
+ } else {
+ rc = -ENOMEM;
+ }
+ } else {
+ refcount_inc(&cpuhw->refcnt);
+ }
+ if (rc) {
+ /*
+ * Error in allocation of event, decrement anchor. Since
+ * cpu_cf_event in not created, its destroy() function is not
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ cpum_cf_free_root();
+ }
+unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+ cpumask_var_t mask;
+ int rc;
+
+ if (cpu == -1) {
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(cpu) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (rc) {
+ for_each_cpu(cpu, mask)
+ cpum_cf_free_cpu(cpu);
+ break;
+ }
+ cpumask_set_cpu(cpu, mask);
+ }
+ free_cpumask_var(mask);
+ } else {
+ rc = cpum_cf_alloc_cpu(cpu);
+ }
+ return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+ if (cpu == -1) {
+ for_each_online_cpu(cpu)
+ cpum_cf_free_cpu(cpu);
+ } else {
+ cpum_cf_free_cpu(cpu);
+ }
+}
+
#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
/* interval in seconds */
@@ -96,11 +392,10 @@ struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */
/* Create the trailer data at the end of a page. */
static void cfdiag_trailer(struct cf_trailer_entry *te)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
struct cpuid cpuid;
- te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */
- te->csvn = cpuhw->info.csvn;
+ te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */
+ te->csvn = cpumf_ctr_info.csvn;
get_cpu_id(&cpuid); /* Machine type */
te->mach_type = cpuid.machine;
@@ -112,6 +407,63 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
te->timestamp = get_tod_clock_fast();
}
+/*
+ * The number of counters per counter set varies between machine generations,
+ * but is constant when running on a particular machine generation.
+ * Determine each counter set size at device driver initialization and
+ * retrieve it later.
+ */
+static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
+static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
+{
+ size_t ctrset_size = 0;
+
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (cpumf_ctr_info.cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (cpumf_ctr_info.cfvn == 1)
+ ctrset_size = 6;
+ else if (cpumf_ctr_info.cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
+ ctrset_size = 16;
+ else if (cpumf_ctr_info.csvn >= 6)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (cpumf_ctr_info.csvn == 1)
+ ctrset_size = 32;
+ else if (cpumf_ctr_info.csvn == 2)
+ ctrset_size = 48;
+ else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
+ ctrset_size = 128;
+ else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (cpumf_ctr_info.csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+ cpumf_ctr_setsizes[ctrset] = ctrset_size;
+}
+
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
+{
+ return cpumf_ctr_setsizes[ctrset];
+}
+
/* Read a counter set. The counter set number determines the counter set and
* the CPUM-CF first and second version number determine the number of
* available counters in each counter set.
@@ -130,14 +482,13 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
size_t room, bool error_ok)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
size_t ctrset_size, need = 0;
int rc = 3; /* Assume write failure */
ctrdata->def = CF_DIAG_CTRSET_DEF;
ctrdata->set = ctrset;
ctrdata->res1 = 0;
- ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
+ ctrset_size = cpum_cf_read_setsize(ctrset);
if (ctrset_size) { /* Save data */
need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
@@ -151,10 +502,6 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
need = 0;
}
- debug_sprintf_event(cf_dbg, 3,
- "%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
- " need %zd rc %d\n", __func__, ctrset, ctrset_size,
- cpuhw->info.cfvn, cpuhw->info.csvn, need, rc);
return need;
}
@@ -213,25 +560,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
+ int i;
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
- } while (ctrstart->def && auth);
+ }
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
@@ -259,40 +612,35 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
return set;
}
-static int validate_ctr_version(const struct hw_perf_event *hwc,
- enum cpumf_ctr_set set)
+static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
{
- struct cpu_cf_events *cpuhw;
- int err = 0;
u16 mtdiag_ctl;
-
- cpuhw = &get_cpu_var(cpu_cf_events);
+ int err = 0;
/* check required version for counter sets */
switch (set) {
case CPUMF_CTR_SET_BASIC:
case CPUMF_CTR_SET_USER:
- if (cpuhw->info.cfvn < 1)
+ if (cpumf_ctr_info.cfvn < 1)
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_CRYPTO:
- if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
- hwc->config > 79) ||
- (cpuhw->info.csvn >= 6 && hwc->config > 83))
+ if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
+ config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_EXT:
- if (cpuhw->info.csvn < 1)
+ if (cpumf_ctr_info.csvn < 1)
err = -EOPNOTSUPP;
- if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
- (cpuhw->info.csvn == 2 && hwc->config > 175) ||
- (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
- && hwc->config > 255) ||
- (cpuhw->info.csvn >= 6 && hwc->config > 287))
+ if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
+ (cpumf_ctr_info.csvn == 2 && config > 175) ||
+ (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
+ config > 255) ||
+ (cpumf_ctr_info.csvn >= 6 && config > 287))
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_MT_DIAG:
- if (cpuhw->info.csvn <= 3)
+ if (cpumf_ctr_info.csvn <= 3)
err = -EOPNOTSUPP;
/*
* MT-diagnostic counters are read-only. The counter set
@@ -307,35 +655,15 @@ static int validate_ctr_version(const struct hw_perf_event *hwc,
* counter set is enabled and active.
*/
mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
- if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
- (cpuhw->info.enable_ctl & mtdiag_ctl) &&
- (cpuhw->info.act_ctl & mtdiag_ctl)))
+ if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
+ (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
+ (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_MAX:
err = -EOPNOTSUPP;
}
- put_cpu_var(cpu_cf_events);
- return err;
-}
-
-static int validate_ctr_auth(const struct hw_perf_event *hwc)
-{
- struct cpu_cf_events *cpuhw;
- int err = 0;
-
- cpuhw = &get_cpu_var(cpu_cf_events);
-
- /* Check authorization for cpu counter sets.
- * If the particular CPU counter set is not authorized,
- * return with -ENOENT in order to fall back to other
- * PMUs that might suffice the event request.
- */
- if (!(hwc->config_base & cpuhw->info.auth_ctl))
- err = -ENOENT;
-
- put_cpu_var(cpu_cf_events);
return err;
}
@@ -346,20 +674,17 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc)
*/
static void cpumf_pmu_enable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
+ if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
return;
err = lcctl(cpuhw->state | cpuhw->dev_state);
- if (err) {
- pr_err("Enabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
- }
-
- cpuhw->flags |= PMU_F_ENABLED;
+ if (err)
+ pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
+ else
+ cpuhw->flags |= PMU_F_ENABLED;
}
/*
@@ -369,40 +694,26 @@ static void cpumf_pmu_enable(struct pmu *pmu)
*/
static void cpumf_pmu_disable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
u64 inactive;
+ int err;
- if (!(cpuhw->flags & PMU_F_ENABLED))
+ if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
return;
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
inactive |= cpuhw->dev_state;
err = lcctl(inactive);
- if (err) {
- pr_err("Disabling the performance measuring unit "
- "failed with rc=%x\n", err);
- return;
- }
-
- cpuhw->flags &= ~PMU_F_ENABLED;
+ if (err)
+ pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
+ else
+ cpuhw->flags &= ~PMU_F_ENABLED;
}
-
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ cpum_cf_free(event->cpu);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -426,12 +737,10 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static void cpumf_hw_inuse(void)
+static int is_userspace_event(u64 ev)
{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_inc_return(&num_events) == 1)
- __kernel_cpumcf_begin();
- mutex_unlock(&pmc_reserve_mutex);
+ return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
}
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
@@ -439,7 +748,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
enum cpumf_ctr_set set;
- int err = 0;
u64 ev;
switch (type) {
@@ -456,19 +764,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
if (is_sampling_event(event)) /* No sampling support */
return -ENOENT;
ev = attr->config;
- /* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_user[ev];
-
- /* No support for kernel space counters only */
+ /*
+ * Count user space (problem-state) only
+ * Handle events 32 and 33 as 0:u and 1:u
+ */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+ }
} else if (!attr->exclude_kernel && attr->exclude_user) {
+ /* No support for kernel space counters only */
return -EOPNOTSUPP;
- } else { /* Count user and kernel space */
- if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_basic[ev];
+ } else {
+ /* Count user and kernel space, incl. events 32 + 33 */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
}
break;
@@ -505,18 +820,22 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
- /* Finally, validate version and authorization of the counter set */
- err = validate_ctr_auth(hwc);
- if (!err)
- err = validate_ctr_version(hwc, set);
-
- return err;
+ /*
+ * Finally, validate version and authorization of the counter set.
+ * If the particular CPU counter set is not authorized,
+ * return with -ENOENT in order to fall back to other
+ * PMUs that might suffice the event request.
+ */
+ if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
+ return -ENOENT;
+ return validate_ctr_version(hwc->config, set);
}
-/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+/* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different
* attribute::type values:
* - PERF_TYPE_HARDWARE:
* - pmu->type:
@@ -539,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
- int err;
+ int err = -ENOENT;
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
- else
- return -ENOENT;
-
- if (unlikely(err) && event->destroy)
- event->destroy(event);
return err;
}
@@ -560,8 +874,8 @@ static int hw_perf_event_reset(struct perf_event *event)
u64 prev, new;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err) {
if (err != 3)
@@ -573,7 +887,7 @@ static int hw_perf_event_reset(struct perf_event *event)
*/
new = 0;
}
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
return err;
}
@@ -583,12 +897,12 @@ static void hw_perf_event_update(struct perf_event *event)
u64 prev, new, delta;
int err;
+ prev = local64_read(&event->hw.prev_count);
do {
- prev = local64_read(&event->hw.prev_count);
err = ecctr(event->hw.config, &new);
if (err)
return;
- } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+ } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new));
delta = (prev <= new) ? new - prev
: (-1ULL - prev) + new + 1; /* overflow */
@@ -605,7 +919,7 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -662,17 +976,10 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
- raw.size = raw.frag.size;
- data.raw = &raw;
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
- debug_sprintf_event(cf_dbg, 3,
- "%s event %#llx sample_type %#llx raw %d ov %d\n",
- __func__, event->hw.config,
- event->attr.sample_type, raw.size, overflow);
- if (overflow)
- event->pmu->stop(event, 0);
perf_event_update_userpage(event);
return overflow;
@@ -680,7 +987,7 @@ static int cfdiag_push_sample(struct perf_event *event,
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -707,8 +1014,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
false);
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
cfdiag_push_sample(event, cpuhw);
- } else if (cpuhw->flags & PMU_F_RESERVED) {
- /* Only update when PMU not hotplugged off */
+ } else {
hw_perf_event_update(event);
}
hwc->state |= PERF_HES_UPTODATE;
@@ -717,7 +1023,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -730,7 +1036,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -741,7 +1047,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
*
* When a new perf event has been added but not yet started, this can
* clear enable control and resets all counters in a set. Therefore,
- * cpumf_pmu_start() always has to reenable a counter set.
+ * cpumf_pmu_start() always has to re-enable a counter set.
*/
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
if (!atomic_read(&cpuhw->ctr_set[i]))
@@ -762,31 +1068,172 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
+static struct cfset_session { /* CPUs and counter set bit mask */
+ struct list_head head; /* Head of list of active processes */
+} cfset_session = {
+ .head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ * and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ * pmu_delete(). The event itself is removed when the file descriptor is
+ * closed.
+ */
+static int cfset_online_cpu(unsigned int cpu);
+
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+ int rc = 0;
+
+ /*
+ * Ignore notification for perf_event_open().
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (!rc)
+ cfset_online_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return rc;
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+ /*
+ * During task exit processing of grouped perf events triggered by CPU
+ * hotplug processing, pmu_disable() is called as part of perf context
+ * removal process. Therefore do not trigger event removal now for
+ * perf_event_open() created events. Perf common code triggers event
+ * destruction when the event file descriptor is closed.
+ *
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ cfset_offline_cpu(cpu);
+ cpum_cf_free_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+ return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_cf_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+
+ /*
+ * Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case.
+ */
+ cpuhw = this_cpu_cfhw();
+ if (!cpuhw)
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpumf_ctr_info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
static int cfset_init(void);
static int __init cpumf_pmu_init(void)
{
int rc;
- if (!kernel_cpumcf_avail())
+ /* Extract counter measurement facility information */
+ if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
return -ENODEV;
+ /* Determine and store counter set sizes for later reference */
+ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+ cpum_cf_make_setsize(rc);
+
+ /*
+ * Clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters
+ */
+ system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+ return rc;
+ }
+
/* Setup s390dbf facility */
cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
if (!cf_dbg) {
pr_err("Registration of s390dbf(cpum_cf) failed\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out1;
}
debug_register_view(cf_dbg, &debug_sprintf_view);
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc) {
- debug_unregister_view(cf_dbg, &debug_sprintf_view);
- debug_unregister(cf_dbg);
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ goto out2;
} else if (stccm_avail()) { /* Setup counter set device */
cfset_init();
}
+
+ rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ cpum_cf_online_cpu, cpum_cf_offline_cpu);
+ return rc;
+
+out2:
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
+out1:
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
return rc;
}
@@ -795,19 +1242,11 @@ static int __init cpumf_pmu_init(void)
* counter set via normal file operations.
*/
-static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */
-static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
unsigned int sets; /* Counter set bit mask */
atomic_t cpus_ack; /* # CPUs successfully executed func */
};
-static struct cfset_session { /* CPUs and counter set bit mask */
- struct list_head head; /* Head of list of active processes */
-} cfset_session = {
- .head = LIST_HEAD_INIT(cfset_session.head)
-};
-
struct cfset_request { /* CPUs and counter set bit mask */
unsigned long ctrset; /* Bit mask of counter set to read */
cpumask_t mask; /* CPU mask to read from */
@@ -869,11 +1308,11 @@ static void cfset_session_add(struct cfset_request *p)
/* Stop all counter sets via ioctl interface */
static void cfset_ioctl_off(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
- /* Check if any counter set used by /dev/hwc */
+ /* Check if any counter set used by /dev/hwctr */
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
if ((p->sets & cpumf_ctr_ctl[rc])) {
if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
@@ -890,14 +1329,12 @@ static void cfset_ioctl_off(void *parm)
cpuhw->state, S390_HWCTR_DEVICE, rc);
if (!cpuhw->dev_state)
cpuhw->flags &= ~PMU_F_IN_USE;
- debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
- __func__, rc, cpuhw->state, cpuhw->dev_state);
}
/* Start counter sets on particular CPU */
static void cfset_ioctl_on(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
@@ -913,17 +1350,13 @@ static void cfset_ioctl_on(void *parm)
else
pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
- debug_sprintf_event(cf_dbg, 4, "%s rc %d state %#llx dev_state %#llx\n",
- __func__, rc, cpuhw->state, cpuhw->dev_state);
}
static void cfset_release_cpu(void *p)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int rc;
- debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
- __func__, cpuhw->state, cpuhw->dev_state);
cpuhw->dev_state = 0;
rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */
if (rc)
@@ -959,27 +1392,41 @@ static int cfset_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
}
- if (!atomic_dec_return(&cfset_opencnt))
+ if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
on_each_cpu(cfset_release_cpu, NULL, 1);
+ cpum_cf_free(-1);
+ }
mutex_unlock(&cfset_ctrset_mutex);
-
- hw_perf_event_destroy(NULL);
return 0;
}
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
static int cfset_open(struct inode *inode, struct file *file)
{
- if (!capable(CAP_SYS_ADMIN))
+ int rc = 0;
+
+ if (!perfmon_capable())
return -EPERM;
+ file->private_data = NULL;
+
mutex_lock(&cfset_ctrset_mutex);
- if (atomic_inc_return(&cfset_opencnt) == 1)
- cfset_session_init();
+ if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
+ rc = cpum_cf_alloc(-1);
+ if (!rc) {
+ cfset_session_init();
+ refcount_set(&cfset_opencnt, 1);
+ }
+ }
mutex_unlock(&cfset_ctrset_mutex);
- cpumf_hw_inuse();
- file->private_data = NULL;
/* nonseekable_open() never fails */
- return nonseekable_open(inode, file);
+ return rc ?: nonseekable_open(inode, file);
}
static int cfset_all_start(struct cfset_request *req)
@@ -998,13 +1445,11 @@ static int cfset_all_start(struct cfset_request *req)
if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
rc = -EIO;
- debug_sprintf_event(cf_dbg, 4, "%s CPUs missing", __func__);
}
free_cpumask_var(mask);
return rc;
}
-
/* Return the maximum required space for all possible CPUs in case one
* CPU will be onlined during the START, READ, STOP cycles.
* To find out the size of the counter sets, any one CPU will do. They
@@ -1012,34 +1457,32 @@ static int cfset_all_start(struct cfset_request *req)
*/
static size_t cfset_needspace(unsigned int sets)
{
- struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
size_t bytes = 0;
int i;
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
if (!(sets & cpumf_ctr_ctl[i]))
continue;
- bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+ bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
sizeof(((struct s390_ctrset_setdata *)0)->set) +
sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
}
bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
- put_cpu_ptr(&cpu_cf_events);
return bytes;
}
static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
{
struct s390_ctrset_read __user *ctrset_read;
- unsigned int cpu, cpus, rc;
+ unsigned int cpu, cpus, rc = 0;
void __user *uptr;
ctrset_read = (struct s390_ctrset_read __user *)arg;
uptr = ctrset_read->data;
for_each_cpu(cpu, mask) {
- struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+ struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
struct s390_ctrset_cpudata __user *ctrset_cpudata;
ctrset_cpudata = uptr;
@@ -1047,17 +1490,18 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
cpuhw->used);
- if (rc)
- return -EFAULT;
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
+ }
uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
cond_resched();
}
cpus = cpumask_weight(mask);
if (put_user(cpus, &ctrset_read->no_cpus))
- return -EFAULT;
- debug_sprintf_event(cf_dbg, 4, "%s copied %ld\n", __func__,
- uptr - (void __user *)ctrset_read->data);
- return 0;
+ rc = -EFAULT;
+out:
+ return rc;
}
static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
@@ -1080,7 +1524,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
/* Read all counter sets. */
static void cfset_cpu_read(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int set, set_size;
size_t space;
@@ -1097,7 +1541,7 @@ static void cfset_cpu_read(void *parm)
if (!(p->sets & cpumf_ctr_ctl[set]))
continue; /* Counter set not in list */
- set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
+ set_size = cpum_cf_read_setsize(set);
space = sizeof(cpuhw->data) - cpuhw->used;
space = cfset_cpuset_read(sp, set, set_size, space);
if (space) {
@@ -1105,8 +1549,6 @@ static void cfset_cpu_read(void *parm)
cpuhw->sets += 1;
}
}
- debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
- cpuhw->sets, cpuhw->used);
}
static int cfset_all_read(unsigned long arg, struct cfset_request *req)
@@ -1128,14 +1570,10 @@ static int cfset_all_read(unsigned long arg, struct cfset_request *req)
static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
{
- struct s390_ctrset_read read;
int ret = -ENODATA;
- if (req && req->ctrset) {
- if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
- return -EFAULT;
+ if (req && req->ctrset)
ret = cfset_all_read(arg, req);
- }
return ret;
}
@@ -1204,8 +1642,6 @@ static long cfset_ioctl_start(unsigned long arg, struct file *file)
if (!ret) {
cfset_session_add(preq);
file->private_data = preq;
- debug_sprintf_event(cf_dbg, 4, "%s set %#lx need %ld ret %d\n",
- __func__, preq->ctrset, need, ret);
} else {
kfree(preq);
}
@@ -1255,24 +1691,23 @@ static const struct file_operations cfset_fops = {
.release = cfset_release,
.unlocked_ioctl = cfset_ioctl,
.compat_ioctl = cfset_ioctl,
- .llseek = no_llseek
};
static struct miscdevice cfset_dev = {
.name = S390_HWCTR_DEVICE,
.minor = MISC_DYNAMIC_MINOR,
.fops = &cfset_fops,
+ .mode = 0666,
};
/* Hotplug add of a CPU. Scan through all active processes and add
* that CPU to the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_online_cpu(unsigned int cpu)
+static int cfset_online_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1280,19 +1715,18 @@ int cfset_online_cpu(unsigned int cpu)
cpumask_set_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
*/
-int cfset_offline_cpu(unsigned int cpu)
+static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1300,28 +1734,22 @@ int cfset_offline_cpu(unsigned int cpu)
cpumask_clear_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
static void cfdiag_read(struct perf_event *event)
{
- debug_sprintf_event(cf_dbg, 3, "%s event %#llx count %ld\n", __func__,
- event->attr.config, local64_read(&event->count));
}
static int get_authctrsets(void)
{
- struct cpu_cf_events *cpuhw;
unsigned long auth = 0;
enum cpumf_ctr_set i;
- cpuhw = &get_cpu_var(cpu_cf_events);
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+ if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
auth |= cpumf_ctr_ctl[i];
}
- put_cpu_var(cpu_cf_events);
return auth;
}
@@ -1355,8 +1783,6 @@ static int cfdiag_event_init2(struct perf_event *event)
if (!event->hw.config_base)
err = -EINVAL;
- debug_sprintf_event(cf_dbg, 5, "%s err %d config_base %#lx\n",
- __func__, err, event->hw.config_base);
return err;
}
@@ -1381,12 +1807,11 @@ static int cfdiag_event_init(struct perf_event *event)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
- if (unlikely(err))
- event->destroy(event);
out:
return err;
}
@@ -1429,7 +1854,7 @@ static const struct attribute_group *cfdiag_attr_groups[] = {
/* Performance monitoring unit for event CF_DIAG. Since this event
* is also started and stopped via the perf_event_open() system call, use
* the same event enable/disable call back functions. They do not
- * have a pointer to the perf_event strcture as first parameter.
+ * have a pointer to the perf_event structure as first parameter.
*
* The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
* Reuse them and distinguish the event (always first parameter) via
@@ -1459,7 +1884,7 @@ static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
enum cpumf_ctr_set i;
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- size_t size = cpum_cf_ctrset_size(i, info);
+ size_t size = cpum_cf_read_setsize(i);
if (size)
max_size += size * sizeof(u64) +
@@ -1493,16 +1918,12 @@ static void cfdiag_get_cpu_speed(void)
static int cfset_init(void)
{
- struct cpumf_ctr_info info;
size_t need;
int rc;
- if (qctri(&info))
- return -ENODEV;
-
cfdiag_get_cpu_speed();
/* Make sure the counter set data fits into predefined buffer. */
- need = cfdiag_maxsize(&info);
+ need = cfdiag_maxsize(&cpumf_ctr_info);
if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
need);
diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c
deleted file mode 100644
index 8ee48672233f..000000000000
--- a/arch/s390/kernel/perf_cpum_cf_common.c
+++ /dev/null
@@ -1,233 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * CPU-Measurement Counter Facility Support - Common Layer
- *
- * Copyright IBM Corp. 2019
- * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
- */
-#define KMSG_COMPONENT "cpum_cf_common"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/kernel_stat.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <asm/ctl_reg.h>
-#include <asm/irq.h>
-#include <asm/cpu_mcf.h>
-
-/* Per-CPU event structure for the counter facility */
-DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = {
- .ctr_set = {
- [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
- [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0),
- },
- .alert = ATOMIC64_INIT(0),
- .state = 0,
- .dev_state = 0,
- .flags = 0,
- .used = 0,
- .usedss = 0,
- .sets = 0
-};
-/* Indicator whether the CPU-Measurement Counter Facility Support is ready */
-static bool cpum_cf_initalized;
-
-/* CPU-measurement alerts for the counter facility */
-static void cpumf_measurement_alert(struct ext_code ext_code,
- unsigned int alert, unsigned long unused)
-{
- struct cpu_cf_events *cpuhw;
-
- if (!(alert & CPU_MF_INT_CF_MASK))
- return;
-
- inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- /* Measurement alerts are shared and might happen when the PMU
- * is not reserved. Ignore these alerts in this case. */
- if (!(cpuhw->flags & PMU_F_RESERVED))
- return;
-
- /* counter authorization change alert */
- if (alert & CPU_MF_INT_CF_CACA)
- qctri(&cpuhw->info);
-
- /* loss of counter data alert */
- if (alert & CPU_MF_INT_CF_LCDA)
- pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
-
- /* loss of MT counter data alert */
- if (alert & CPU_MF_INT_CF_MTDA)
- pr_warn("CPU[%i] MT counter data was lost\n",
- smp_processor_id());
-
- /* store alert for special handling by in-kernel users */
- atomic64_or(alert, &cpuhw->alert);
-}
-
-#define PMC_INIT 0
-#define PMC_RELEASE 1
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch (*((int *) flags)) {
- case PMC_INIT:
- memset(&cpuhw->info, 0, sizeof(cpuhw->info));
- qctri(&cpuhw->info);
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
-}
-
-bool kernel_cpumcf_avail(void)
-{
- return cpum_cf_initalized;
-}
-EXPORT_SYMBOL(kernel_cpumcf_avail);
-
-/* Initialize the CPU-measurement counter facility */
-int __kernel_cpumcf_begin(void)
-{
- int flags = PMC_INIT;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-EXPORT_SYMBOL(__kernel_cpumcf_begin);
-
-/* Obtain the CPU-measurement alerts for the counter facility */
-unsigned long kernel_cpumcf_alert(int clear)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- unsigned long alert;
-
- alert = atomic64_read(&cpuhw->alert);
- if (clear)
- atomic64_set(&cpuhw->alert, 0);
-
- return alert;
-}
-EXPORT_SYMBOL(kernel_cpumcf_alert);
-
-/* Release the CPU-measurement counter facility */
-void __kernel_cpumcf_end(void)
-{
- int flags = PMC_RELEASE;
-
- on_each_cpu(cpum_cf_setup_cpu, &flags, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-EXPORT_SYMBOL(__kernel_cpumcf_end);
-
-static int cpum_cf_setup(unsigned int cpu, int flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu(&flags);
- local_irq_enable();
- return 0;
-}
-
-static int cpum_cf_online_cpu(unsigned int cpu)
-{
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
-}
-
-static int cpum_cf_offline_cpu(unsigned int cpu)
-{
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
-}
-
-/* Return the maximum possible counter set size (in number of 8 byte counters)
- * depending on type and model number.
- */
-size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info)
-{
- size_t ctrset_size = 0;
-
- switch (ctrset) {
- case CPUMF_CTR_SET_BASIC:
- if (info->cfvn >= 1)
- ctrset_size = 6;
- break;
- case CPUMF_CTR_SET_USER:
- if (info->cfvn == 1)
- ctrset_size = 6;
- else if (info->cfvn >= 3)
- ctrset_size = 2;
- break;
- case CPUMF_CTR_SET_CRYPTO:
- if (info->csvn >= 1 && info->csvn <= 5)
- ctrset_size = 16;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 20;
- break;
- case CPUMF_CTR_SET_EXT:
- if (info->csvn == 1)
- ctrset_size = 32;
- else if (info->csvn == 2)
- ctrset_size = 48;
- else if (info->csvn >= 3 && info->csvn <= 5)
- ctrset_size = 128;
- else if (info->csvn == 6 || info->csvn == 7)
- ctrset_size = 160;
- break;
- case CPUMF_CTR_SET_MT_DIAG:
- if (info->csvn > 3)
- ctrset_size = 48;
- break;
- case CPUMF_CTR_SET_MAX:
- break;
- }
-
- return ctrset_size;
-}
-
-static int __init cpum_cf_init(void)
-{
- int rc;
-
- if (!cpum_cf_avail())
- return -ENODEV;
-
- /* clear bit 15 of cr0 to unauthorize problem-state to
- * extract measurement counters */
- ctl_clear_bit(0, 48);
-
- /* register handler for measurement-alert interruptions */
- rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
- cpumf_measurement_alert);
- if (rc) {
- pr_err("Registering for CPU-measurement alerts "
- "failed with rc=%i\n", rc);
- return rc;
- }
-
- rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
- "perf/s390/cf:online",
- cpum_cf_online_cpu, cpum_cf_offline_cpu);
- if (!rc)
- cpum_cf_initalized = true;
-
- return rc;
-}
-early_initcall(cpum_cf_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 0d64aafd158f..7ace1f9e4ccf 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
-
CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
@@ -291,8 +290,8 @@ CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
-CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x0108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x0109);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
@@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca);
+CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb);
+CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc);
+CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd);
+CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce);
+CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116);
+CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
@@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
@@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
NULL,
};
+static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE),
+ CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD),
+ CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD),
+ CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION),
+ CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z17, SORTL),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK),
+ CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -855,16 +1012,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
}
/* Determine version specific crypto set */
- switch (ci.csvn) {
- case 1 ... 5:
+ csvn = none;
+ if (ci.csvn >= 1 && ci.csvn <= 5)
csvn = cpumcf_svn_12345_pmu_event_attr;
- break;
- case 6 ... 7:
- csvn = cpumcf_svn_67_pmu_event_attr;
- break;
- default:
- csvn = none;
- }
+ else if (ci.csvn >= 6)
+ csvn = cpumcf_svn_678_pmu_event_attr;
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
@@ -897,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
case 0x3932:
model = cpumcf_z16_pmu_event_attr;
break;
+ case 0x9175:
+ case 0x9176:
+ model = cpumcf_z17_pmu_event_attr;
+ break;
default:
model = none;
break;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..91469401f2c9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -22,6 +22,23 @@
#include <asm/irq.h>
#include <asm/debug.h>
#include <asm/timex.h>
+#include <linux/io.h>
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR 2
+#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
+
+#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc)
+#define TEAR_REG(hwc) ((hwc)->last_tag)
+#define SAMPL_RATE(hwc) ((hwc)->event_base)
+#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SAMPL_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
/* Minimum number of sample-data-block-tables:
* At least one table is required for the sampling buffer structure.
@@ -42,7 +59,7 @@
#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
static inline int require_table_link(const void *sdbt)
{
- return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+ return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
}
/* Minimum and maximum sampling buffer sizes:
@@ -99,15 +116,55 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
/* Debug feature */
static debug_info_t *sfdbg;
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+ unsigned long freq)
+{
+ return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+ unsigned long rate)
+{
+ return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+ void *ret;
+
+ ret = (void *)v;
+ ret += PAGE_SIZE;
+ ret -= sizeof(struct hws_trailer_entry);
+
+ return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+ return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+ return phys_to_virt(*s & ~0x1UL);
+}
+
/*
* sf_disable() - Switch off sampling facility
*/
-static int sf_disable(void)
+static void sf_disable(void)
{
struct hws_lsctl_request_block sreq;
memset(&sreq, 0, sizeof(sreq));
- return lsctl(&sreq);
+ lsctl(&sreq);
}
/*
@@ -123,57 +180,44 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
*/
static void free_sampling_buffer(struct sf_buffer *sfb)
{
- unsigned long *sdbt, *curr;
-
- if (!sfb->sdbt)
- return;
+ unsigned long *sdbt, *curr, *head;
sdbt = sfb->sdbt;
- curr = sdbt;
-
+ if (!sdbt)
+ return;
+ sfb->sdbt = NULL;
/* Free the SDBT after all SDBs are processed... */
- while (1) {
- if (!*curr || !sdbt)
- break;
-
- /* Process table-link entries */
+ head = sdbt;
+ curr = sdbt;
+ do {
if (is_link_entry(curr)) {
+ /* Process table-link entries */
curr = get_next_sdbt(curr);
- if (sdbt)
- free_page((unsigned long) sdbt);
-
- /* If the origin is reached, sampling buffer is freed */
- if (curr == sfb->sdbt)
- break;
- else
- sdbt = curr;
+ free_page((unsigned long)sdbt);
+ sdbt = curr;
} else {
/* Process SDB pointer */
- if (*curr) {
- free_page(*curr);
- curr++;
- }
+ free_page((unsigned long)phys_to_virt(*curr));
+ curr++;
}
- }
-
- debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
- (unsigned long)sfb->sdbt);
+ } while (curr != head);
memset(sfb, 0, sizeof(*sfb));
}
static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
{
- unsigned long sdb, *trailer;
+ struct hws_trailer_entry *te;
+ unsigned long sdb;
/* Allocate and initialize sample-data-block */
sdb = get_zeroed_page(gfp_flags);
if (!sdb)
return -ENOMEM;
- trailer = trailer_entry_ptr(sdb);
- *trailer = SDB_TE_ALERT_REQ_MASK;
+ te = trailer_entry_ptr(sdb);
+ te->header.a = 1;
/* Link SDB into the sample-data-block-table */
- *sdbt = sdb;
+ *sdbt = virt_to_phys((void *)sdb);
return 0;
}
@@ -212,10 +256,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "%s: "
- "sampling buffer is not linked: origin %#lx"
- " tail %#lx\n", __func__,
- (unsigned long)sfb->sdbt,
+ debug_sprintf_event(sfdbg, 3, "%s buffer not linked origin %#lx tail %#lx\n",
+ __func__, (unsigned long)sfb->sdbt,
(unsigned long)tail);
return -EINVAL;
}
@@ -225,14 +267,14 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
for (i = 0; i < num_sdb; i++) {
/* Allocate a new SDB-table if it is full. */
if (require_table_link(tail)) {
- new = (unsigned long *) get_zeroed_page(gfp_flags);
+ new = (unsigned long *)get_zeroed_page(gfp_flags);
if (!new) {
rc = -ENOMEM;
break;
}
sfb->num_sdbt++;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys((void *)new) + 1;
tail_prev = tail;
tail = new;
}
@@ -251,7 +293,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
*/
if (tail_prev) {
sfb->num_sdbt--;
- free_page((unsigned long) new);
+ free_page((unsigned long)new);
tail = tail_prev;
}
break;
@@ -262,12 +304,9 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
}
/* Link sampling buffer to its origin */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "%s: new buffer"
- " settings: sdbt %lu sdb %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -290,7 +329,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
return -EINVAL;
/* Allocate the sample-data-block-table origin */
- sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (!sfb->sdbt)
return -ENOMEM;
sfb->num_sdb = 0;
@@ -300,19 +339,12 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
* realloc_sampling_buffer() invocation.
*/
sfb->tail = sfb->sdbt;
- *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+ *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
/* Allocate requested number of sample-data-blocks */
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
- if (rc) {
+ if (rc)
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "%s: "
- "realloc_sampling_buffer failed with rc %i\n",
- __func__, rc);
- } else
- debug_sprintf_event(sfdbg, 4,
- "%s: tear %#lx dear %#lx\n", __func__,
- (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -324,8 +356,8 @@ static void sfb_set_limits(unsigned long min, unsigned long max)
CPUM_SF_MAX_SDB = max;
memset(&si, 0, sizeof(si));
- if (!qsi(&si))
- CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+ qsi(&si);
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
}
static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
@@ -344,12 +376,6 @@ static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
return 0;
}
-static int sfb_has_pending_allocs(struct sf_buffer *sfb,
- struct hw_perf_event *hwc)
-{
- return sfb_pending_allocs(sfb, hwc) > 0;
-}
-
static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
{
/* Limit the number of SDBs to not exceed the maximum */
@@ -366,14 +392,13 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
{
- if (cpuhw->sfb.sdbt)
+ if (sf_buffer_available(cpuhw))
free_sampling_buffer(&cpuhw->sfb);
}
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
unsigned long n_sdb, freq;
- size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
@@ -404,7 +429,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
* to 511 SDBs).
*/
- sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
@@ -420,12 +444,6 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
if (sf_buffer_available(cpuhw))
return 0;
- debug_sprintf_event(sfdbg, 3,
- "%s: rate %lu f %lu sdb %lu/%lu"
- " sample_size %lu cpuhw %p\n", __func__,
- SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
- sample_size, cpuhw);
-
return alloc_sampling_buffer(&cpuhw->sfb,
sfb_pending_allocs(&cpuhw->sfb, hwc));
}
@@ -482,8 +500,6 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
- __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -501,13 +517,11 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
static void extend_sampling_buffer(struct sf_buffer *sfb,
struct hw_perf_event *hwc)
{
- unsigned long num, num_old;
- int rc;
+ unsigned long num;
num = sfb_pending_allocs(sfb, hwc);
if (!num)
return;
- num_old = sfb->num_sdb;
/* Disable the sampling facility to reset any states and also
* clear pending measurement alerts.
@@ -519,62 +533,32 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
* called by perf. Because this is a reallocation, it is fine if the
* new SDB-request cannot be satisfied immediately.
*/
- rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
- if (rc)
- debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
- __func__, rc);
-
- if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "%s: "
- "req %lu alloc %lu remaining %lu\n",
- __func__, num, sfb->num_sdb - num_old,
- sfb_pending_allocs(sfb, hwc));
+ realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
}
/* Number of perf events counting hardware events */
-static atomic_t num_events;
+static refcount_t num_events;
/* Used to avoid races in calling reserve/release_cpumf_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
#define PMC_INIT 0
#define PMC_RELEASE 1
-#define PMC_FAILURE 2
static void setup_pmc_cpu(void *flags)
{
- int err;
- struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- err = 0;
- switch (*((int *) flags)) {
+ sf_disable();
+ switch (*((int *)flags)) {
case PMC_INIT:
- memset(cpusf, 0, sizeof(*cpusf));
- err = qsi(&cpusf->qsi);
- if (err)
- break;
- cpusf->flags |= PMU_F_RESERVED;
- err = sf_disable();
- if (err)
- pr_err("Switching off the sampling facility failed "
- "with rc %i\n", err);
- debug_sprintf_event(sfdbg, 5,
- "%s: initialized: cpuhw %p\n", __func__,
- cpusf);
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ qsi(&cpuhw->qsi);
+ cpuhw->flags |= PMU_F_RESERVED;
break;
case PMC_RELEASE:
- cpusf->flags &= ~PMU_F_RESERVED;
- err = sf_disable();
- if (err) {
- pr_err("Switching off the sampling facility failed "
- "with rc %i\n", err);
- } else
- deallocate_buffers(cpusf);
- debug_sprintf_event(sfdbg, 5,
- "%s: released: cpuhw %p\n", __func__,
- cpusf);
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ deallocate_buffers(cpuhw);
break;
}
- if (err)
- *((int *) flags) |= PMC_FAILURE;
}
static void release_pmc_hardware(void)
@@ -585,27 +569,19 @@ static void release_pmc_hardware(void)
on_each_cpu(setup_pmc_cpu, &flags, 1);
}
-static int reserve_pmc_hardware(void)
+static void reserve_pmc_hardware(void)
{
int flags = PMC_INIT;
on_each_cpu(setup_pmc_cpu, &flags, 1);
- if (flags & PMC_FAILURE) {
- release_pmc_hardware();
- return -ENODEV;
- }
irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
}
static void hw_perf_event_destroy(struct perf_event *event)
{
/* Release PMC if this is the last perf event */
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- release_pmc_hardware();
+ if (refcount_dec_and_mutex_lock(&num_events, &pmc_reserve_mutex)) {
+ release_pmc_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -671,7 +647,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();
- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;
@@ -708,9 +685,6 @@ static unsigned long getrate(bool freq, unsigned long sample,
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
- debug_sprintf_event(sfdbg, 1, "%s: "
- "Sampling rate exceeds maximum "
- "perf sample rate\n", __func__);
rate = 0;
}
}
@@ -755,9 +729,6 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
- debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
- __func__, event->cpu, event->attr.sample_period,
- event->attr.freq, SAMPLE_FREQ_MODE(hwc));
return 0;
}
@@ -767,23 +738,16 @@ static int __hw_perf_event_init(struct perf_event *event)
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- int cpu, err;
+ int cpu, err = 0;
/* Reserve CPU-measurement sampling facility */
- err = 0;
- if (!atomic_inc_not_zero(&num_events)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
- err = -EBUSY;
- else
- atomic_inc(&num_events);
- mutex_unlock(&pmc_reserve_mutex);
+ mutex_lock(&pmc_reserve_mutex);
+ if (!refcount_inc_not_zero(&num_events)) {
+ reserve_pmc_hardware();
+ refcount_set(&num_events, 1);
}
event->destroy = hw_perf_event_destroy;
- if (err)
- goto out;
-
/* Access per-CPU sampling information (query sampling info) */
/*
* The event->cpu value can be -1 to count on every CPU, for example,
@@ -795,9 +759,9 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
memset(&si, 0, sizeof(si));
cpuhw = NULL;
- if (event->cpu == -1)
+ if (event->cpu == -1) {
qsi(&si);
- else {
+ } else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
*/
@@ -834,21 +798,13 @@ static int __hw_perf_event_init(struct perf_event *event)
SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
}
- /* Check and set other sampling flags */
- if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
- SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
-
err = __hw_perf_event_init_rate(event, &si);
if (err)
goto out;
- /* Initialize sample data overflow accounting */
- hwc->extra_reg.reg = REG_OVERFLOW;
- OVERFLOW_REG(hwc) = 0;
-
/* Use AUX buffer. No need to allocate it by ourself */
if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
- return 0;
+ goto out;
/* Allocate the per-CPU sampling buffer using the CPU information
* from the event. If the event is not pinned to a particular
@@ -878,6 +834,7 @@ static int __hw_perf_event_init(struct perf_event *event)
if (is_default_overflow_handler(event))
event->overflow_handler = cpumsf_output_event_pid;
out:
+ mutex_unlock(&pmc_reserve_mutex);
return err;
}
@@ -919,10 +876,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
return -ENOENT;
}
- /* Check online status of the CPU to which the event is pinned */
- if (event->cpu >= 0 && !cpu_online(event->cpu))
- return -ENODEV;
-
/* Force reset of idle/hv excludes regardless of what the
* user requested.
*/
@@ -932,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
event->attr.exclude_idle = 0;
err = __hw_perf_event_init(event);
- if (unlikely(err))
- if (event->destroy)
- event->destroy(event);
return err;
}
@@ -944,10 +894,14 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
struct hw_perf_event *hwc;
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
- return;
-
- if (cpuhw->flags & PMU_F_ERR_MASK)
+ /*
+ * Event must be
+ * - added/started on this CPU (PMU_F_IN_USE set)
+ * - and CPU must be available (PMU_F_RESERVED set)
+ * - and not already enabled (PMU_F_ENABLED not set)
+ * - and not in error condition (PMU_F_ERR_MASK not set)
+ */
+ if (cpuhw->flags != (PMU_F_IN_USE | PMU_F_RESERVED))
return;
/* Check whether to extent the sampling buffer.
@@ -961,40 +915,27 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
* facility, but it can be fully re-enabled using sampling controls that
* have been saved in cpumsf_pmu_disable().
*/
- if (cpuhw->event) {
- hwc = &cpuhw->event->hw;
- if (!(SAMPL_DIAG_MODE(hwc))) {
- /*
- * Account number of overflow-designated
- * buffer extents
- */
- sfb_account_overflows(cpuhw, hwc);
- extend_sampling_buffer(&cpuhw->sfb, hwc);
- }
- /* Rate may be adjusted with ioctl() */
- cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+ hwc = &cpuhw->event->hw;
+ if (!(SAMPL_DIAG_MODE(hwc))) {
+ /*
+ * Account number of overflow-designated buffer extents
+ */
+ sfb_account_overflows(cpuhw, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
}
+ /* Rate may be adjusted with ioctl() */
+ cpuhw->lsctl.interval = SAMPL_RATE(hwc);
/* (Re)enable the PMU and sampling facility */
- cpuhw->flags |= PMU_F_ENABLED;
- barrier();
-
err = lsctl(&cpuhw->lsctl);
if (err) {
- cpuhw->flags &= ~PMU_F_ENABLED;
- pr_err("Loading sampling controls failed: op %i err %i\n",
- 1, err);
+ pr_err("Loading sampling controls failed: op 1 err %i\n", err);
return;
}
/* Load current program parameter */
- lpp(&S390_lowcore.lpp);
-
- debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
- "interval %#lx tear %#lx dear %#lx\n", __func__,
- cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
- cpuhw->lsctl.cd, cpuhw->lsctl.interval,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+ lpp(&get_lowcore()->lpp);
+ cpuhw->flags |= PMU_F_ENABLED;
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1017,31 +958,27 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
err = lsctl(&inactive);
if (err) {
- pr_err("Loading sampling controls failed: op %i err %i\n",
- 2, err);
+ pr_err("Loading sampling controls failed: op 2 err %i\n", err);
return;
}
- /* Save state of TEAR and DEAR register contents */
- err = qsi(&si);
- if (!err) {
- /* TEAR/DEAR values are valid only if the sampling facility is
- * enabled. Note that cpumsf_pmu_disable() might be called even
- * for a disabled sampling facility because cpumsf_pmu_enable()
- * controls the enable/disable state.
- */
- if (si.es) {
- cpuhw->lsctl.tear = si.tear;
- cpuhw->lsctl.dear = si.dear;
- }
- } else
- debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
- __func__, err);
+ /*
+ * Save state of TEAR and DEAR register contents.
+ * TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ qsi(&si);
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
cpuhw->flags &= ~PMU_F_ENABLED;
}
-/* perf_exclude_event() - Filter event
+/* perf_event_exclude() - Filter event
* @event: The perf event
* @regs: pt_regs structure
* @sde_regs: Sample-data-entry (sde) regs structure
@@ -1050,7 +987,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
*
* Return non-zero if the event shall be excluded.
*/
-static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
struct perf_sf_sde_regs *sde_regs)
{
if (event->attr.exclude_user && user_mode(regs))
@@ -1133,12 +1070,9 @@ static int perf_push_sample(struct perf_event *event,
data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
overflow = 0;
- if (perf_exclude_event(event, &regs, sde_regs))
+ if (perf_event_exclude(event, &regs, sde_regs))
goto out;
- if (perf_event_overflow(event, &data, &regs)) {
- overflow = 1;
- event->pmu->stop(event, 0);
- }
+ overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
out:
return overflow;
@@ -1175,9 +1109,9 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
struct hws_trailer_entry *te;
struct hws_basic_entry *sample;
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
- sample = (struct hws_basic_entry *) *sdbt;
- while ((unsigned long *) sample < (unsigned long *) te) {
+ te = trailer_entry_ptr((unsigned long)sdbt);
+ sample = (struct hws_basic_entry *)sdbt;
+ while ((unsigned long *)sample < (unsigned long *)te) {
/* Check for an empty sample */
if (!sample->def || sample->LS)
break;
@@ -1202,11 +1136,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sprintf_event(sfdbg, 4,
- "%s: Found unknown"
- " sampling data entry: te->f %i"
- " basic.def %#4x (%p)\n", __func__,
- te->f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1217,7 +1146,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
* that are not full. Stop processing if the first
* invalid format was detected.
*/
- if (!te->f)
+ if (!te->header.f)
break;
}
@@ -1235,71 +1164,62 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
* The sampling buffer position are retrieved and saved in the TEAR_REG
* register of the specified perf event.
*
- * Only full sample-data-blocks are processed. Specify the flash_all flag
- * to also walk through partially filled sample-data-blocks. It is ignored
- * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
- * enforces the processing of full sample-data-blocks only (trailer entries
- * with the block-full-indicator bit set).
+ * Only full sample-data-blocks are processed. Specify the flush_all flag
+ * to also walk through partially filled sample-data-blocks.
*/
static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
+ unsigned long long event_overflow, sampl_overflow, num_sdb;
struct hw_perf_event *hwc = &event->hw;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
- unsigned long *sdbt;
- unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ unsigned long *sdbt, sdb;
int done;
/*
* AUX buffer is used when in diagnostic sampling mode.
* No perf events/samples are created.
*/
- if (SAMPL_DIAG_MODE(&event->hw))
+ if (SAMPL_DIAG_MODE(hwc))
return;
- if (flush_all && SDB_FULL_BLOCKS(hwc))
- flush_all = 0;
-
- sdbt = (unsigned long *) TEAR_REG(hwc);
+ sdbt = (unsigned long *)TEAR_REG(hwc);
done = event_overflow = sampl_overflow = num_sdb = 0;
while (!done) {
/* Get the trailer entry of the sample-data-block */
- te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sdb = (unsigned long)phys_to_virt(*sdbt);
+ te = trailer_entry_ptr(sdb);
/* Leave loop if no more work to do (block full indicator) */
- if (!te->f) {
+ if (!te->header.f) {
done = 1;
if (!flush_all)
break;
}
/* Check the sample overflow count */
- if (te->overflow)
+ if (te->header.overflow)
/* Account sample overflows and, if a particular limit
* is reached, extend the sampling buffer.
* For details, see sfb_account_overflows().
*/
- sampl_overflow += te->overflow;
-
- /* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
- "overflow %llu timestamp %#llx\n",
- __func__, (unsigned long)sdbt, te->overflow,
- (te->f) ? trailer_timestamp(te) : 0ULL);
+ sampl_overflow += te->header.overflow;
/* Collect all samples from a single sample-data-block and
* flag if an (perf) event overflow happened. If so, the PMU
* is stopped and remaining samples will be discarded.
*/
- hw_collect_samples(event, sdbt, &event_overflow);
+ hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
num_sdb++;
/* Reset trailer (using compare-double-and-swap) */
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
- te_flags |= SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- te->flags, te->overflow,
- te_flags, 0ULL));
+ new.val = prev.val;
+ new.f = 0;
+ new.a = 1;
+ new.overflow = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
/* Advance to next sample-data-block */
sdbt++;
@@ -1307,7 +1227,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sdbt = get_next_sdbt(sdbt);
/* Update event hardware registers */
- TEAR_REG(hwc) = (unsigned long) sdbt;
+ TEAR_REG(hwc) = (unsigned long)sdbt;
/* Stop processing sample-data if all samples of the current
* sample-data-block were flushed even if it was not full.
@@ -1329,25 +1249,30 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
* are dropped.
* Slightly increase the interval to avoid hitting this limit.
*/
- if (event_overflow) {
+ if (event_overflow)
SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
- debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
- __func__,
- DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
- }
+}
+
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+ unsigned long i)
+{
+ return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+ return end >= start ? end - start + 1 : 0;
+}
- if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "%s: "
- "overflows: sample %llu event %llu"
- " total %llu num_sdb %llu\n",
- __func__, sampl_overflow, event_overflow,
- OVERFLOW_REG(hwc), num_sdb);
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->alert_mark);
}
-#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
-#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
-#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
-#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+ return aux_sdb_num(aux->head, aux->empty_mark);
+}
/*
* Get trailer entry by index of SDB.
@@ -1357,9 +1282,9 @@ static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
{
unsigned long sdb;
- index = AUX_SDB_INDEX(aux, index);
+ index = aux_sdb_index(aux, index);
sdb = aux->sdb_index[index];
- return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ return trailer_entry_ptr(sdb);
}
/*
@@ -1381,10 +1306,10 @@ static void aux_output_end(struct perf_output_handle *handle)
if (!aux)
return;
- range_scan = AUX_SDB_NUM_ALERT(aux);
+ range_scan = aux_sdb_num_alert(aux);
for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
+ if (!te->header.f)
break;
}
/* i is num of SDBs which are full */
@@ -1392,10 +1317,7 @@ static void aux_output_end(struct perf_output_handle *handle)
/* Remove alert indicators in the buffer */
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags &= ~SDB_TE_ALERT_REQ_MASK;
-
- debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
- __func__, i, range_scan, aux->head);
+ te->header.a = 0;
}
/*
@@ -1411,12 +1333,10 @@ static int aux_output_begin(struct perf_output_handle *handle,
struct aux_buffer *aux,
struct cpu_hw_sf *cpuhw)
{
- unsigned long range;
- unsigned long i, range_scan, idx;
- unsigned long head, base, offset;
+ unsigned long range, i, range_scan, idx, head, base, offset;
struct hws_trailer_entry *te;
- if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+ if (handle->head & ~PAGE_MASK)
return -EINVAL;
aux->head = handle->head >> PAGE_SHIFT;
@@ -1428,18 +1348,14 @@ static int aux_output_begin(struct perf_output_handle *handle,
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
- debug_sprintf_event(sfdbg, 6,
- "%s: range %ld head %ld alert %ld empty %ld\n",
- __func__, range, aux->head, aux->alert_mark,
- aux->empty_mark);
- if (range > AUX_SDB_NUM_EMPTY(aux)) {
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
+ if (range > aux_sdb_num_empty(aux)) {
+ range_scan = range - aux_sdb_num_empty(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
- SDB_TE_ALERT_REQ_MASK);
- te->overflow = 0;
+ te->header.f = 0;
+ te->header.a = 0;
+ te->header.overflow = 0;
}
/* Save the position of empty SDBs */
aux->empty_mark = aux->head + range - 1;
@@ -1448,20 +1364,14 @@ static int aux_output_begin(struct perf_output_handle *handle,
/* Set alert indicator */
aux->alert_mark = aux->head + range/2 - 1;
te = aux_sdb_trailer(aux, aux->alert_mark);
- te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
+ te->header.a = 1;
/* Reset hardware buffer head */
- head = AUX_SDB_INDEX(aux, aux->head);
+ head = aux_sdb_index(aux, aux->head);
base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
offset = head % CPUM_SF_SDB_PER_TABLE;
- cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
- cpuhw->lsctl.dear = aux->sdb_index[head];
-
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
- "index %ld tear %#lx dear %#lx\n", __func__,
- aux->head, aux->alert_mark, aux->empty_mark,
- head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+ cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+ cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
return 0;
}
@@ -1475,14 +1385,15 @@ static int aux_output_begin(struct perf_output_handle *handle,
static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
+ union hws_trailer_header prev, new;
struct hws_trailer_entry *te;
te = aux_sdb_trailer(aux, alert_index);
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- orig_flags = te->flags;
- *overflow = orig_overflow = te->overflow;
- if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
+ new.val = prev.val;
+ *overflow = prev.overflow;
+ if (prev.f) {
/*
* SDB is already set by hardware.
* Abort and try to set somewhere
@@ -1490,10 +1401,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
*/
return false;
}
- new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 1;
+ new.overflow = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
return true;
}
@@ -1522,14 +1432,12 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
- unsigned long long orig_overflow, orig_flags, new_flags;
- unsigned long i, range_scan, idx, idx_old;
+ union hws_trailer_header prev, new;
+ unsigned long i, range_scan, idx;
+ unsigned long long orig_overflow;
struct hws_trailer_entry *te;
- debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
- "empty %ld\n", __func__, range, aux->head,
- aux->alert_mark, aux->empty_mark);
- if (range <= AUX_SDB_NUM_EMPTY(aux))
+ if (range <= aux_sdb_num_empty(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
* Just set alert indicator. Should check race with hardware
@@ -1550,30 +1458,27 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* Start scanning from one SDB behind empty_mark. If the new alert
* indicator fall into this range, set it.
*/
- range_scan = range - AUX_SDB_NUM_EMPTY(aux);
- idx_old = idx = aux->empty_mark + 1;
+ range_scan = range - aux_sdb_num_empty(aux);
+ idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
+ prev.val = READ_ONCE_ALIGNED_128(te->header.val);
do {
- orig_flags = te->flags;
- orig_overflow = te->overflow;
- new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
+ new.val = prev.val;
+ orig_overflow = prev.overflow;
+ new.f = 0;
+ new.overflow = 0;
if (idx == aux->alert_mark)
- new_flags |= SDB_TE_ALERT_REQ_MASK;
+ new.a = 1;
else
- new_flags &= ~SDB_TE_ALERT_REQ_MASK;
- } while (!cmpxchg_double(&te->flags, &te->overflow,
- orig_flags, orig_overflow,
- new_flags, 0ULL));
+ new.a = 0;
+ } while (!try_cmpxchg128(&te->header.val, &prev.val, new.val));
*overflow += orig_overflow;
}
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
- debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
- "empty %ld\n", __func__, range_scan, idx_old,
- idx - 1, aux->empty_mark);
return true;
}
@@ -1590,12 +1495,12 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long num_sdb;
aux = perf_get_aux(handle);
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Inform user space new data arrived */
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
- debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+ debug_sprintf_event(sfdbg, 6, "%s #alert %ld\n", __func__,
size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
@@ -1607,12 +1512,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
pr_err("The AUX buffer with %lu pages for the "
"diagnostic-sampling mode is full\n",
num_sdb);
- debug_sprintf_event(sfdbg, 1,
- "%s: AUX buffer used up\n",
- __func__);
break;
}
- if (WARN_ON_ONCE(!aux))
+ if (!aux)
return;
/* Update head and alert_mark to new position */
@@ -1632,23 +1534,11 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
"pages to overflow\n", aux->sfb.num_sdb);
- debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
- "overflow %lld\n", __func__,
- aux->head, range, overflow);
} else {
- size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "already full, try another\n",
- __func__,
- aux->head, aux->alert_mark);
}
}
-
- if (done)
- debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
- "empty %ld\n", __func__, aux->head,
- aux->alert_mark, aux->empty_mark);
}
/*
@@ -1670,15 +1560,13 @@ static void aux_buffer_free(void *data)
kfree(aux->sdbt_index);
kfree(aux->sdb_index);
kfree(aux);
-
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
{
struct hws_trailer_entry *te;
- te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
+ te = trailer_entry_ptr(sdb);
/* Save clock base */
te->clock_base = 1;
@@ -1741,7 +1629,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
/* Allocate the first SDBT */
sfb->num_sdbt = 0;
- sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (!sfb->sdbt)
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
@@ -1753,23 +1641,23 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
for (i = 0; i < nr_pages; i++, tail++) {
if (require_table_link(tail)) {
- new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ new = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (!new)
goto no_sdbt;
aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
/* Link current page to tail of chain */
- *tail = (unsigned long)(void *) new + 1;
+ *tail = virt_to_phys(new) + 1;
tail = new;
}
/* Tail is the entry in a SDBT */
- *tail = (unsigned long)pages[i];
+ *tail = virt_to_phys(pages[i]);
aux->sdb_index[i] = (unsigned long)pages[i];
aux_sdb_init((unsigned long)pages[i]);
}
sfb->num_sdb = nr_pages;
/* Link the last entry in the SDBT to the first SDBT */
- *tail = (unsigned long) sfb->sdbt + 1;
+ *tail = virt_to_phys(sfb->sdbt) + 1;
sfb->tail = tail;
/*
@@ -1779,9 +1667,6 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
- sfb->num_sdbt, sfb->num_sdb);
-
return aux;
no_sdbt:
@@ -1802,7 +1687,7 @@ static void cpumsf_pmu_read(struct perf_event *event)
/* Nothing to do ... updates are interrupt-driven */
}
-/* Check if the new sampling period/freqeuncy is appropriate.
+/* Check if the new sampling period/frequency is appropriate.
*
* Return non-zero on error and zero on passed checks.
*/
@@ -1814,8 +1699,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
memset(&si, 0, sizeof(si));
if (event->cpu == -1) {
- if (qsi(&si))
- return -ENODEV;
+ qsi(&si);
} else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
@@ -1825,7 +1709,7 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
si = cpuhw->qsi;
}
- do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+ do_freq = !!SAMPL_FREQ_MODE(&event->hw);
rate = getrate(do_freq, value, &si);
if (!rate)
return -EINVAL;
@@ -1833,10 +1717,6 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
- debug_sprintf_event(sfdbg, 4, "%s:"
- " cpu %d value %#llx period %#llx freq %d\n",
- __func__, event->cpu, value,
- event->attr.sample_period, do_freq);
return 0;
}
@@ -1847,12 +1727,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (!(event->hw.state & PERF_HES_STOPPED))
return;
-
- if (flags & PERF_EF_RELOAD)
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
perf_pmu_disable(event->pmu);
event->hw.state = 0;
cpuhw->lsctl.cs = 1;
@@ -1877,7 +1753,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
event->hw.state |= PERF_HES_STOPPED;
if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
- hw_perf_event_update(event, 1);
+ /* CPU hotplug off removes SDBs. No samples to extract. */
+ if (cpuhw->flags & PMU_F_RESERVED)
+ hw_perf_event_update(event, 1);
event->hw.state |= PERF_HES_UPTODATE;
}
perf_pmu_enable(event->pmu);
@@ -1887,15 +1765,14 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct aux_buffer *aux;
- int err;
+ int err = 0;
if (cpuhw->flags & PMU_F_IN_USE)
return -EAGAIN;
- if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+ if (!SAMPL_DIAG_MODE(&event->hw) && !sf_buffer_available(cpuhw))
return -EINVAL;
- err = 0;
perf_pmu_disable(event->pmu);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -1909,9 +1786,9 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
cpuhw->lsctl.h = 1;
cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
if (!SAMPL_DIAG_MODE(&event->hw)) {
- cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
- cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
- TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
+ cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt;
+ TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt;
}
/* Ensure sampling functions are in the disabled state. If disabled,
@@ -2055,18 +1932,17 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Program alert request */
if (alert & CPU_MF_INT_SF_PRA) {
- if (cpuhw->flags & PMU_F_IN_USE)
+ if (cpuhw->flags & PMU_F_IN_USE) {
if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
hw_collect_aux(cpuhw);
else
hw_perf_event_update(cpuhw->event, 0);
- else
- WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
}
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+ debug_sprintf_event(sfdbg, 6, "%s alert %#x\n", __func__,
alert);
/* Sampling authorization change request */
@@ -2082,7 +1958,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Invalid sampling buffer entry */
if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
- pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ pr_err("A sampling buffer entry is incorrect (alert=%#x)\n",
alert);
cpuhw->flags |= PMU_F_ERR_IBE;
sf_disable();
@@ -2094,7 +1970,7 @@ static int cpusf_pmu_setup(unsigned int cpu, int flags)
/* Ignore the notification if no events are scheduled on the PMU.
* This might be racy...
*/
- if (!atomic_read(&num_events))
+ if (!refcount_read(&num_events))
return 0;
local_irq_disable();
@@ -2156,10 +2032,12 @@ static const struct kernel_param_ops param_ops_sfb_size = {
.get = param_get_sfb_size,
};
-#define RS_INIT_FAILURE_QSI 0x0001
-#define RS_INIT_FAILURE_BSDES 0x0002
-#define RS_INIT_FAILURE_ALRT 0x0003
-#define RS_INIT_FAILURE_PERF 0x0004
+enum {
+ RS_INIT_FAILURE_BSDES = 2, /* Bad basic sampling size */
+ RS_INIT_FAILURE_ALRT = 3, /* IRQ registration failure */
+ RS_INIT_FAILURE_PERF = 4 /* PMU registration failure */
+};
+
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
@@ -2175,11 +2053,7 @@ static int __init init_cpum_sampling_pmu(void)
return -ENODEV;
memset(&si, 0, sizeof(si));
- if (qsi(&si)) {
- pr_cpumsf_err(RS_INIT_FAILURE_QSI);
- return -ENODEV;
- }
-
+ qsi(&si);
if (!si.as && !si.ad)
return -ENODEV;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0969..2b9611c4718e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <linux/sysfs.h>
+#include <asm/stacktrace.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
@@ -54,7 +57,7 @@ static unsigned long instruction_pointer_guest(struct pt_regs *regs)
return sie_block(regs)->gpsw.addr;
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
return is_in_guest(regs) ? instruction_pointer_guest(regs)
: instruction_pointer(regs);
@@ -81,7 +84,7 @@ static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
return flags;
}
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
/* Check if the cpum_sf PMU has created the pt_regs structure.
* In this case, perf misc flags can be easily extracted. Otherwise,
@@ -212,6 +215,12 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
}
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ arch_stack_walk_user_common(NULL, NULL, entry, regs, true);
+}
+
/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
@@ -219,5 +228,5 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
- return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+ return sysfs_emit(page, "event=0x%04llx\n", pmu_attr->id);
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index b38b4ae01589..63875270941b 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -16,8 +16,7 @@
#include <linux/export.h>
#include <linux/io.h>
#include <linux/perf_event.h>
-
-#include <asm/ctl_reg.h>
+#include <asm/ctlreg.h>
#include <asm/pai.h>
#include <asm/debug.h>
@@ -35,13 +34,49 @@ struct pai_userdata {
struct paicrypt_map {
unsigned long *page; /* Page for CPU to store counters */
struct pai_userdata *save; /* Page to store no-zero counters */
- unsigned int users; /* # of PAI crypto users */
- unsigned int sampler; /* # of PAI crypto samplers */
- unsigned int counter; /* # of PAI crypto counters */
+ unsigned int active_events; /* # of PAI crypto users */
+ refcount_t refcnt; /* Reference count mapped buffers */
struct perf_event *event; /* Perf event for sampling */
+ struct list_head syswide_list; /* List system-wide sampling events */
+};
+
+struct paicrypt_mapptr {
+ struct paicrypt_map *mapptr;
};
-static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+static struct paicrypt_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct paicrypt_mapptr __percpu *mapptr;
+} paicrypt_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paicrypt_root_free(void)
+{
+ if (refcount_dec_and_test(&paicrypt_root.refcnt)) {
+ free_percpu(paicrypt_root.mapptr);
+ paicrypt_root.mapptr = NULL;
+ }
+ debug_sprintf_event(cfm_dbg, 5, "%s root.refcount %d\n", __func__,
+ refcount_read(&paicrypt_root.refcnt));
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paicrypt_root_alloc(void)
+{
+ if (!refcount_inc_not_zero(&paicrypt_root.refcnt)) {
+ /* The memory is already zeroed. */
+ paicrypt_root.mapptr = alloc_percpu(struct paicrypt_mapptr);
+ if (!paicrypt_root.mapptr)
+ return -ENOMEM;
+ refcount_set(&paicrypt_root.refcnt, 1);
+ }
+ return 0;
+}
/* Release the PMU if event is the last perf event */
static DEFINE_MUTEX(pai_reserve_mutex);
@@ -49,38 +84,51 @@ static DEFINE_MUTEX(pai_reserve_mutex);
/* Adjust usage counters and remove allocated memory when all users are
* gone.
*/
-static void paicrypt_event_destroy(struct perf_event *event)
+static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu)
{
- struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+ struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
+ struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
- static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
- if (event->attr.sample_period)
- cpump->sampler -= 1;
- else
- cpump->counter -= 1;
- debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d"
- " sampler %d counter %d\n", __func__,
- event->attr.config, event->cpu, cpump->sampler,
- cpump->counter);
- if (!cpump->counter && !cpump->sampler) {
+ debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d "
+ "refcnt %u\n", __func__, event->attr.config,
+ event->cpu, cpump->active_events,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
cpump->save);
free_page((unsigned long)cpump->page);
- cpump->page = NULL;
kvfree(cpump->save);
- cpump->save = NULL;
+ kfree(cpump);
+ mp->mapptr = NULL;
}
+ paicrypt_root_free();
mutex_unlock(&pai_reserve_mutex);
}
-static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ static_branch_dec(&pai_key);
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paicrypt_event_destroy_cpu(event, event->cpu);
+ }
+}
+
+static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
nr += PAI_CRYPTO_MAXCTR;
- return cpump->page[nr];
+ return page[nr];
}
/* Read the counter values. Return value from location in CMP. For event
@@ -88,18 +136,19 @@ static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
*/
static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
u64 sum = 0;
int i;
if (event->attr.config != PAI_CRYPTO_BASE) {
- return paicrypt_getctr(cpump,
+ return paicrypt_getctr(cpump->page,
event->attr.config - PAI_CRYPTO_BASE,
kernel);
}
for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = paicrypt_getctr(cpump, i, kernel);
+ u64 val = paicrypt_getctr(cpump->page, i, kernel);
if (!val)
continue;
@@ -120,66 +169,110 @@ static u64 paicrypt_getall(struct perf_event *event)
return sum;
}
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for crypto events
- *
- * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
+/* Check concurrent access of counting and sampling for crypto events.
* This function is called in process context and it is save to block.
* When the event initialization functions fails, no other call back will
* be invoked.
*
* Allocate the memory for the event.
*/
-static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
{
- unsigned int *use_ptr;
- int rc = 0;
+ struct paicrypt_map *cpump = NULL;
+ struct paicrypt_mapptr *mp;
+ int rc;
mutex_lock(&pai_reserve_mutex);
- if (a->sample_period) { /* Sampling requested */
- use_ptr = &cpump->sampler;
- if (cpump->counter || cpump->sampler)
- rc = -EBUSY; /* ... sampling/counting active */
- } else { /* Counting requested */
- use_ptr = &cpump->counter;
- if (cpump->sampler)
- rc = -EBUSY; /* ... and sampling active */
- }
+
+ /* Allocate root node */
+ rc = paicrypt_root_alloc();
if (rc)
goto unlock;
+ /* Allocate node for this event */
+ mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paicrypt_map allocated? */
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump) {
+ rc = -ENOMEM;
+ goto free_root;
+ }
+ INIT_LIST_HEAD(&cpump->syswide_list);
+ }
+
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
- if (cpump->page)
+ if (cpump->page) {
+ refcount_inc(&cpump->refcnt);
goto unlock;
+ }
rc = -ENOMEM;
cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (!cpump->page)
- goto unlock;
+ goto free_paicrypt_map;
cpump->save = kvmalloc_array(paicrypt_cnt + 1,
sizeof(struct pai_userdata), GFP_KERNEL);
if (!cpump->save) {
free_page((unsigned long)cpump->page);
cpump->page = NULL;
- goto unlock;
+ goto free_paicrypt_map;
}
- rc = 0;
-unlock:
- /* If rc is non-zero, do not increment counter/sampler. */
- if (!rc)
- *use_ptr += 1;
- debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx sampler %d"
- " counter %d page %#lx save %p rc %d\n", __func__,
- a->sample_period, cpump->sampler, cpump->counter,
+ /* Set mode and reference count */
+ rc = 0;
+ refcount_set(&cpump->refcnt, 1);
+ mp->mapptr = cpump;
+ debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx "
+ "save %p rc %d\n", __func__, cpump->active_events,
+ refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
+ goto unlock;
+
+free_paicrypt_map:
+ /* Undo memory allocation */
+ kfree(cpump);
+ mp->mapptr = NULL;
+free_root:
+ paicrypt_root_free();
+unlock:
mutex_unlock(&pai_reserve_mutex);
+ return rc ? ERR_PTR(rc) : cpump;
+}
+
+static int paicrypt_event_init_all(struct perf_event *event)
+{
+ struct paicrypt_map *cpump;
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ cpump = paicrypt_busy(event, cpu);
+ if (IS_ERR(cpump)) {
+ for_each_cpu(cpu, maskptr)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ rc = PTR_ERR(cpump);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
return rc;
}
@@ -188,7 +281,7 @@ static int paicrypt_event_init(struct perf_event *event)
{
struct perf_event_attr *a = &event->attr;
struct paicrypt_map *cpump;
- int rc;
+ int rc = 0;
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
@@ -197,25 +290,29 @@ static int paicrypt_event_init(struct perf_event *event)
if (a->config < PAI_CRYPTO_BASE ||
a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
- /* Allow only CPU wide operation, no process context for now. */
- if (event->hw.target || event->cpu == -1)
- return -ENOENT;
- /* Allow only CRYPTO_ALL for sampling. */
+ /* Allow only CRYPTO_ALL for sampling */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ }
- cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
- rc = paicrypt_busy(a, cpump);
- if (rc)
- return rc;
-
- /* Event initialization sets last_tag to 0. When later on the events
- * are deleted and re-added, do not reset the event count value to zero.
- * Events are added, deleted and re-added when 2 or more events
- * are active at the same time.
- */
- event->hw.last_tag = 0;
- cpump->event = event;
+ if (event->cpu >= 0) {
+ cpump = paicrypt_busy(event, event->cpu);
+ if (IS_ERR(cpump))
+ rc = PTR_ERR(cpump);
+ } else {
+ rc = paicrypt_event_init_all(event);
+ }
+ if (rc) {
+ free_page(PAI_SAVE_AREA(event));
+ goto out;
+ }
event->destroy = paicrypt_event_destroy;
if (a->sample_period) {
@@ -230,7 +327,8 @@ static int paicrypt_event_init(struct perf_event *event)
}
static_branch_inc(&pai_key);
- return 0;
+out:
+ return rc;
}
static void paicrypt_read(struct perf_event *event)
@@ -247,76 +345,102 @@ static void paicrypt_read(struct perf_event *event)
static void paicrypt_start(struct perf_event *event, int flags)
{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
u64 sum;
- if (!event->hw.last_tag) {
- event->hw.last_tag = 1;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->count, 0);
+ if (!event->attr.sample_period) { /* Counting */
+ sum = paicrypt_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
+ } else { /* Sampling */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
}
}
static int paicrypt_add(struct perf_event *event, int flags)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
unsigned long ccd;
- if (cpump->users++ == 0) {
+ if (++cpump->active_events == 1) {
ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
- WRITE_ONCE(S390_lowcore.ccd, ccd);
- __ctl_set_bit(0, 50);
+ WRITE_ONCE(get_lowcore()->ccd, ccd);
+ local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
- cpump->event = event;
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
+ if (flags & PERF_EF_START)
paicrypt_start(event, PERF_EF_RELOAD);
- }
event->hw.state = 0;
- if (event->attr.sample_period)
- perf_sched_cb_inc(event->pmu);
return 0;
}
+static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *);
static void paicrypt_stop(struct perf_event *event, int flags)
{
- paicrypt_read(event);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ paicrypt_read(event);
+ } else { /* Sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ perf_sched_cb_dec(event->pmu);
+ list_del(PAI_SWLIST(event));
+ } else {
+ paicrypt_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
+ }
event->hw.state = PERF_HES_STOPPED;
}
static void paicrypt_del(struct perf_event *event, int flags)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
-
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period)
- /* Only counting needs to read counter */
- paicrypt_stop(event, PERF_EF_UPDATE);
- if (cpump->users-- == 1) {
- __ctl_clear_bit(0, 50);
- WRITE_ONCE(S390_lowcore.ccd, 0);
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ paicrypt_stop(event, PERF_EF_UPDATE);
+ if (--cpump->active_events == 0) {
+ local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
+ WRITE_ONCE(get_lowcore()->ccd, 0);
}
}
-/* Create raw data and save it in buffer. Returns number of bytes copied.
- * Saves only positive counter entries of the form
+/* Create raw data and save it in buffer. Calculate the delta for each
+ * counter between this invocation and the last invocation.
+ * Returns number of bytes copied.
+ * Saves only entries with positive counter difference of the form
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paicrypt_copy(struct pai_userdata *userdata,
- struct paicrypt_map *cpump,
- bool exclude_user, bool exclude_kernel)
+static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
+ unsigned long *page_old, bool exclude_user,
+ bool exclude_kernel)
{
int i, outidx = 0;
for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = 0;
+ u64 val = 0, val_old = 0;
- if (!exclude_kernel)
- val += paicrypt_getctr(cpump, i, true);
- if (!exclude_user)
- val += paicrypt_getctr(cpump, i, false);
+ if (!exclude_kernel) {
+ val += paicrypt_getctr(page, i, true);
+ val_old += paicrypt_getctr(page_old, i, true);
+ }
+ if (!exclude_user) {
+ val += paicrypt_getctr(page, i, false);
+ val_old += paicrypt_getctr(page_old, i, false);
+ }
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@@ -326,24 +450,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
return outidx * sizeof(struct pai_userdata);
}
-static int paicrypt_push_sample(void)
+static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
+ struct perf_event *event)
{
- struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
- struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
- size_t rawsize;
int overflow;
- if (!cpump->event) /* No event active */
- return 0;
- rawsize = paicrypt_copy(cpump->save, cpump,
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
- if (!rawsize) /* No incremented counters */
- return 0;
-
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@@ -364,27 +478,54 @@ static int paicrypt_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
- raw.size = raw.frag.size;
- data.raw = &raw;
+ perf_sample_save_raw_data(&data, event, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);
perf_event_update_userpage(event);
- /* Clear lowcore page after read */
- memset(cpump->page, 0, PAGE_SIZE);
+ /* Save crypto counter lowcore page after reading event data. */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
return overflow;
}
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_sample(struct perf_event *event,
+ struct paicrypt_map *cpump)
+{
+ size_t rawsize;
+
+ if (!event) /* No event active */
+ return;
+ rawsize = paicrypt_copy(cpump->save, cpump->page,
+ (unsigned long *)PAI_SAVE_AREA(event),
+ event->attr.exclude_user,
+ event->attr.exclude_kernel);
+ if (rawsize) /* No incremented counters */
+ paicrypt_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_samples(void)
+{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paicrypt_have_sample(event, cpump);
+}
+
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
-static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
/* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
+ * results on schedule_out and if page was dirty, save old values.
*/
if (!sched_in)
- paicrypt_push_sample();
+ paicrypt_have_samples();
}
/* Attribute definitions for paicrypt interface. As with other CPU
@@ -428,7 +569,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = {
/* Performance monitoring unit for mapped counters */
static struct pmu paicrypt = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = paicrypt_event_init,
.add = paicrypt_add,
.del = paicrypt_del,
@@ -598,6 +739,22 @@ static const char * const paicrypt_ctrnames[] = {
[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
[155] = "IBM_RESERVED_155",
[156] = "IBM_RESERVED_156",
+ [157] = "KM_FULL_XTS_AES_128",
+ [158] = "KM_FULL_XTS_AES_256",
+ [159] = "KM_FULL_XTS_ENCRYPTED_AES_128",
+ [160] = "KM_FULL_XTS_ENCRYPTED_AES_256",
+ [161] = "KMAC_HMAC_SHA_224",
+ [162] = "KMAC_HMAC_SHA_256",
+ [163] = "KMAC_HMAC_SHA_384",
+ [164] = "KMAC_HMAC_SHA_512",
+ [165] = "KMAC_HMAC_ENCRYPTED_SHA_224",
+ [166] = "KMAC_HMAC_ENCRYPTED_SHA_256",
+ [167] = "KMAC_HMAC_ENCRYPTED_SHA_384",
+ [168] = "KMAC_HMAC_ENCRYPTED_SHA_512",
+ [169] = "PCKMO_ENCRYPT_HMAC_512_KEY",
+ [170] = "PCKMO_ENCRYPT_HMAC_1024_KEY",
+ [171] = "PCKMO_ENCRYPT_AES_XTS_128",
+ [172] = "PCKMO_ENCRYPT_AES_XTS_256",
};
static void __init attr_event_free(struct attribute **attrs, int num)
@@ -619,6 +776,12 @@ static int __init attr_event_init_one(struct attribute **attrs, int num)
{
struct perf_pmu_events_attr *pa;
+ /* Index larger than array_size, no counter name available */
+ if (num >= ARRAY_SIZE(paicrypt_ctrnames)) {
+ attrs[num] = NULL;
+ return 0;
+ }
+
pa = kzalloc(sizeof(*pa), GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -639,14 +802,13 @@ static int __init attr_event_init(void)
struct attribute **attrs;
int ret, i;
- attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
- GFP_KERNEL);
+ attrs = kmalloc_array(paicrypt_cnt + 2, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
- for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+ for (i = 0; i <= paicrypt_cnt; i++) {
ret = attr_event_init_one(attrs, i);
if (ret) {
- attr_event_free(attrs, i - 1);
+ attr_event_free(attrs, i);
return ret;
}
}
@@ -667,8 +829,10 @@ static int __init paicrypt_init(void)
paicrypt_cnt = ib.num_cc;
if (paicrypt_cnt == 0)
return 0;
- if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
- paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+ if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR) {
+ pr_err("Too many PMU pai_crypto counters %d\n", paicrypt_cnt);
+ return -E2BIG;
+ }
rc = attr_event_init(); /* Export known PAI crypto events */
if (rc) {
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..fd14d5ebccbc
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT "pai_ext"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+#include <asm/ctlreg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define PAIE1_CB_SZ 0x200 /* Size of PAIE1 control block */
+#define PAIE1_CTRBLOCK_SZ 0x400 /* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt; /* Extracted with QPACI instruction */
+
+struct pai_userdata {
+ u16 num;
+ u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb { /* PAI extension 1 control block */
+ u64 header; /* Not used */
+ u64 reserved1;
+ u64 acc; /* Addr to analytics counter control block */
+ u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+ unsigned long *area; /* Area for CPU to store counters */
+ struct pai_userdata *save; /* Area to store non-zero counters */
+ unsigned int active_events; /* # of PAI Extension users */
+ refcount_t refcnt;
+ struct perf_event *event; /* Perf event for sampling */
+ struct paiext_cb *paiext_cb; /* PAI extension control block area */
+ struct list_head syswide_list; /* List system-wide sampling events */
+};
+
+struct paiext_mapptr {
+ struct paiext_map *mapptr;
+};
+
+static struct paiext_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+ if (refcount_dec_and_test(&paiext_root.refcnt)) {
+ free_percpu(paiext_root.mapptr);
+ paiext_root.mapptr = NULL;
+ }
+ debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__,
+ refcount_read(&paiext_root.refcnt));
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+ if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
+ /* The memory is already zeroed. */
+ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+ if (!paiext_root.mapptr) {
+ /* Returning without refcnt adjustment is ok. The
+ * error code is handled by paiext_alloc() which
+ * decrements refcnt when an event can not be
+ * created.
+ */
+ return -ENOMEM;
+ }
+ refcount_set(&paiext_root.refcnt, 1);
+ }
+ return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+ kfree(mp->mapptr->area);
+ kfree(mp->mapptr->paiext_cb);
+ kvfree(mp->mapptr->save);
+ kfree(mp->mapptr);
+ mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy_cpu(struct perf_event *event, int cpu)
+{
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu);
+ struct paiext_map *cpump = mp->mapptr;
+
+ mutex_lock(&paiext_reserve_mutex);
+ if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
+ paiext_free(mp);
+ paiext_root_free();
+ mutex_unlock(&paiext_reserve_mutex);
+}
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paiext_event_destroy_cpu(event, event->cpu);
+ }
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__,
+ event->cpu);
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc_cpu(struct perf_event *event, int cpu)
+{
+ struct paiext_mapptr *mp;
+ struct paiext_map *cpump;
+ int rc;
+
+ mutex_lock(&paiext_reserve_mutex);
+ rc = paiext_root_alloc();
+ if (rc)
+ goto unlock;
+
+ mp = per_cpu_ptr(paiext_root.mapptr, cpu);
+ cpump = mp->mapptr;
+ if (!cpump) { /* Paiext_map allocated? */
+ rc = -ENOMEM;
+ cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+ if (!cpump)
+ goto undo;
+
+ /* Allocate memory for counter area and counter extraction.
+ * These are
+ * - a 512 byte block and requires 512 byte boundary alignment.
+ * - a 1KB byte block and requires 1KB boundary alignment.
+ * Only the first counting event has to allocate the area.
+ *
+ * Note: This works with commit 59bb47985c1d by default.
+ * Backporting this to kernels without this commit might
+ * need adjustment.
+ */
+ mp->mapptr = cpump;
+ cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+ cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+ cpump->save = kvmalloc_array(paiext_cnt + 1,
+ sizeof(struct pai_userdata),
+ GFP_KERNEL);
+ if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+ paiext_free(mp);
+ goto undo;
+ }
+ INIT_LIST_HEAD(&cpump->syswide_list);
+ refcount_set(&cpump->refcnt, 1);
+ rc = 0;
+ } else {
+ refcount_inc(&cpump->refcnt);
+ }
+
+undo:
+ if (rc) {
+ /* Error in allocation of event, decrement anchor. Since
+ * the event in not created, its destroy() function is never
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ paiext_root_free();
+ }
+unlock:
+ mutex_unlock(&paiext_reserve_mutex);
+ /* If rc is non-zero, no increment of counter/sampler was done. */
+ return rc;
+}
+
+static int paiext_alloc(struct perf_event *event)
+{
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ rc = paiext_alloc_cpu(event, cpu);
+ if (rc) {
+ for_each_cpu(cpu, maskptr)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+ /* Offset NNPA in paiext_cb */
+ event->hw.config_base = offsetof(struct paiext_cb, acc);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *a = &event->attr;
+ int rc;
+
+ /* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+ if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+ return -ENOENT;
+ /* PAI extension event must be valid and in supported range */
+ rc = paiext_event_valid(event);
+ if (rc)
+ return rc;
+ /* Allow only event NNPA_ALL for sampling. */
+ if (a->sample_period && a->config != PAI_NNPA_BASE)
+ return -EINVAL;
+ /* Prohibit exclude_user event selection */
+ if (a->exclude_user)
+ return -EINVAL;
+ /* Get a page to store last counter values for sampling */
+ if (a->sample_period) {
+ PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
+ if (!PAI_SAVE_AREA(event))
+ return -ENOMEM;
+ }
+
+ if (event->cpu >= 0)
+ rc = paiext_alloc_cpu(event, event->cpu);
+ else
+ rc = paiext_alloc(event);
+ if (rc) {
+ free_page(PAI_SAVE_AREA(event));
+ return rc;
+ }
+ event->destroy = paiext_event_destroy;
+
+ if (a->sample_period) {
+ a->sample_period = 1;
+ a->freq = 0;
+ /* Register for paicrypt_sched_task() to be called */
+ event->attach_state |= PERF_ATTACH_SCHED_CB;
+ /* Add raw data which are the memory mapped counters */
+ a->sample_type |= PERF_SAMPLE_RAW;
+ /* Turn off inheritance */
+ a->inherit = 0;
+ }
+
+ return 0;
+}
+
+static u64 paiext_getctr(unsigned long *area, int nr)
+{
+ return area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ u64 sum = 0;
+ int i;
+
+ if (event->attr.config != PAI_NNPA_BASE)
+ return paiext_getctr(cpump->area,
+ event->attr.config - PAI_NNPA_BASE);
+
+ for (i = 1; i <= paiext_cnt; i++)
+ sum += paiext_getctr(cpump->area, i);
+
+ return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+ return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+ u64 prev, new, delta;
+
+ prev = local64_read(&event->hw.prev_count);
+ new = paiext_getall(event);
+ local64_set(&event->hw.prev_count, new);
+ delta = new - prev;
+ local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ u64 sum;
+
+ if (!event->attr.sample_period) { /* Counting */
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ } else { /* Sampling */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+ PAIE1_CTRBLOCK_SZ);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
+ }
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ if (++cpump->active_events == 1) {
+ get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb);
+ pcb->acc = virt_to_phys(cpump->area) | 0x1;
+ /* Enable CPU instruction lookup for PAIE1 control block */
+ local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
+ }
+ if (flags & PERF_EF_START)
+ paiext_start(event, PERF_EF_RELOAD);
+ event->hw.state = 0;
+ return 0;
+}
+
+static void paiext_have_sample(struct perf_event *, struct paiext_map *);
+static void paiext_stop(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
+ paiext_read(event);
+ } else { /* Sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_del(PAI_SWLIST(event));
+ perf_sched_cb_dec(event->pmu);
+ } else {
+ paiext_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
+ }
+ event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct paiext_cb *pcb = cpump->paiext_cb;
+
+ paiext_stop(event, PERF_EF_UPDATE);
+ if (--cpump->active_events == 0) {
+ /* Disable CPU instruction lookup for PAIE1 control block */
+ local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
+ pcb->acc = 0;
+ get_lowcore()->aicd = 0;
+ }
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area,
+ unsigned long *area_old)
+{
+ int i, outidx = 0;
+
+ for (i = 1; i <= paiext_cnt; i++) {
+ u64 val = paiext_getctr(area, i);
+ u64 val_old = paiext_getctr(area_old, i);
+
+ if (val >= val_old)
+ val -= val_old;
+ else
+ val = (~0ULL - val_old) + val + 1;
+ if (val) {
+ userdata[outidx].num = i;
+ userdata[outidx].value = val;
+ outidx++;
+ }
+ }
+ return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
+ struct perf_event *event)
+{
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ int overflow;
+
+ /* Setup perf sample */
+ memset(&regs, 0, sizeof(regs));
+ memset(&raw, 0, sizeof(raw));
+ memset(&data, 0, sizeof(data));
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ if (event->attr.sample_type & PERF_SAMPLE_TID) {
+ data.tid_entry.pid = task_tgid_nr(current);
+ data.tid_entry.tid = task_pid_nr(current);
+ }
+ if (event->attr.sample_type & PERF_SAMPLE_TIME)
+ data.time = event->clock();
+ if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+ data.id = event->id;
+ if (event->attr.sample_type & PERF_SAMPLE_CPU)
+ data.cpu_entry.cpu = smp_processor_id();
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.frag.size = rawsize;
+ raw.frag.data = cpump->save;
+ perf_sample_save_raw_data(&data, event, &raw);
+ }
+
+ overflow = perf_event_overflow(event, &data, &regs);
+ perf_event_update_userpage(event);
+ /* Save NNPA lowcore area after read in event */
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+ PAIE1_CTRBLOCK_SZ);
+ return overflow;
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_sample(struct perf_event *event,
+ struct paiext_map *cpump)
+{
+ size_t rawsize;
+
+ if (!event)
+ return;
+ rawsize = paiext_copy(cpump->save, cpump->area,
+ (unsigned long *)PAI_SAVE_AREA(event));
+ if (rawsize) /* Incremented counters */
+ paiext_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_samples(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paiext_have_sample(event, cpump);
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+ /* We started with a clean page on event installation. So read out
+ * results on schedule_out and if page was dirty, save old values.
+ */
+ if (!sched_in)
+ paiext_have_samples();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+ .name = "events",
+ .attrs = NULL, /* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+ .name = "format",
+ .attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+ &paiext_events_group,
+ &paiext_format_group,
+ NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+ .task_ctx_nr = perf_hw_context,
+ .event_init = paiext_event_init,
+ .add = paiext_add,
+ .del = paiext_del,
+ .start = paiext_start,
+ .stop = paiext_stop,
+ .read = paiext_read,
+ .sched_task = paiext_sched_task,
+ .attr_groups = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+ [0] = "NNPA_ALL",
+ [1] = "NNPA_ADD",
+ [2] = "NNPA_SUB",
+ [3] = "NNPA_MUL",
+ [4] = "NNPA_DIV",
+ [5] = "NNPA_MIN",
+ [6] = "NNPA_MAX",
+ [7] = "NNPA_LOG",
+ [8] = "NNPA_EXP",
+ [9] = "NNPA_IBM_RESERVED_9",
+ [10] = "NNPA_RELU",
+ [11] = "NNPA_TANH",
+ [12] = "NNPA_SIGMOID",
+ [13] = "NNPA_SOFTMAX",
+ [14] = "NNPA_BATCHNORM",
+ [15] = "NNPA_MAXPOOL2D",
+ [16] = "NNPA_AVGPOOL2D",
+ [17] = "NNPA_LSTMACT",
+ [18] = "NNPA_GRUACT",
+ [19] = "NNPA_CONVOLUTION",
+ [20] = "NNPA_MATMUL_OP",
+ [21] = "NNPA_MATMUL_OP_BCAST23",
+ [22] = "NNPA_SMALLBATCH",
+ [23] = "NNPA_LARGEDIM",
+ [24] = "NNPA_SMALLTENSOR",
+ [25] = "NNPA_1MFRAME",
+ [26] = "NNPA_2GFRAME",
+ [27] = "NNPA_ACCESSEXCEPT",
+ [28] = "NNPA_TRANSFORM",
+ [29] = "NNPA_GELU",
+ [30] = "NNPA_MOMENTS",
+ [31] = "NNPA_LAYERNORM",
+ [32] = "NNPA_MATMUL_OP_BCAST1",
+ [33] = "NNPA_SQRT",
+ [34] = "NNPA_INVSQRT",
+ [35] = "NNPA_NORM",
+ [36] = "NNPA_REDUCE",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+ struct device_attribute *dap;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ dap = container_of(attrs[i], struct device_attribute, attr);
+ pa = container_of(dap, struct perf_pmu_events_attr, attr);
+ kfree(pa);
+ }
+ kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+ struct perf_pmu_events_attr *pa;
+
+ /* Index larger than array_size, no counter name available */
+ if (num >= ARRAY_SIZE(paiext_ctrnames)) {
+ attrs[num] = NULL;
+ return 0;
+ }
+
+ pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+ if (!pa)
+ return -ENOMEM;
+
+ sysfs_attr_init(&pa->attr.attr);
+ pa->id = PAI_NNPA_BASE + num;
+ pa->attr.attr.name = paiext_ctrnames[num];
+ pa->attr.attr.mode = 0444;
+ pa->attr.show = cpumf_events_sysfs_show;
+ pa->attr.store = NULL;
+ attrs[num] = &pa->attr.attr;
+ return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+ struct attribute **attrs;
+ int ret, i;
+
+ attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+ for (i = 0; i <= paiext_cnt; i++) {
+ ret = attr_event_init_one(attrs, i);
+ if (ret) {
+ attr_event_free(attrs, i);
+ return ret;
+ }
+ }
+ attrs[i] = NULL;
+ paiext_events_group.attrs = attrs;
+ return 0;
+}
+
+static int __init paiext_init(void)
+{
+ struct qpaci_info_block ib;
+ int rc = -ENOMEM;
+
+ if (!test_facility(197))
+ return 0;
+
+ qpaci(&ib);
+ paiext_cnt = ib.num_nnpa;
+ if (paiext_cnt >= PAI_NNPA_MAXCTR)
+ paiext_cnt = PAI_NNPA_MAXCTR;
+ if (!paiext_cnt)
+ return 0;
+
+ rc = attr_event_init();
+ if (rc) {
+ pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+ return rc;
+ }
+
+ /* Setup s390dbf facility */
+ paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+ if (!paiext_dbg) {
+ pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+ rc = -ENOMEM;
+ goto out_init;
+ }
+ debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+ rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+ if (rc) {
+ pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+ "rc=%i\n", rc);
+ goto out_pmu;
+ }
+
+ return 0;
+
+out_pmu:
+ debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+ debug_unregister(paiext_dbg);
+out_init:
+ attr_event_free(paiext_events_group.attrs,
+ ARRAY_SIZE(paiext_ctrnames) + 1);
+ return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 6e9e5d5e927e..a6b058ee4a36 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -5,8 +5,7 @@
#include <linux/errno.h>
#include <linux/bug.h>
#include <asm/ptrace.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/types.h>
+#include <asm/fpu.h>
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
@@ -20,8 +19,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return 0;
idx -= PERF_REG_S390_FP0;
- fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
- : current->thread.fpu.fprs[idx];
+ fp = *(freg_t *)(current->thread.ufpu.vxrs + idx);
return fp.ui;
}
@@ -63,6 +61,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
*/
regs_user->regs = task_pt_regs(current);
if (user_mode(regs_user->regs))
- save_fpu_regs();
+ save_user_fpu_regs();
regs_user->abi = perf_reg_abi(current);
}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 89949b9f3cf8..9637aee43c40 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -30,16 +30,20 @@
#include <linux/export.h>
#include <linux/init_task.h>
#include <linux/entry-common.h>
+#include <linux/io.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
+#include <asm/switch_to.h>
#include <asm/cpu_mf.h>
-#include <asm/io.h>
#include <asm/processor.h>
+#include <asm/ptrace.h>
#include <asm/vtimer.h>
#include <asm/exec.h>
+#include <asm/fpu.h>
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/stacktrace.h>
-#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include <asm/unwind.h>
#include "entry.h"
@@ -67,10 +71,10 @@ void flush_thread(void)
void arch_setup_new_exec(void)
{
- if (S390_lowcore.current_pid != current->pid) {
- S390_lowcore.current_pid = current->pid;
+ if (get_lowcore()->current_pid != current->pid) {
+ get_lowcore()->current_pid = current->pid;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
}
@@ -82,15 +86,22 @@ void arch_release_task_struct(struct task_struct *tsk)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
+ save_user_fpu_regs();
+
+ *dst = *src;
+ dst->thread.kfpu_flags = 0;
+
/*
- * Save the floating-point or vector register state of the current
- * task and set the CIF_FPU flag to lazy restore the FPU register
- * state when returning to user space.
+ * Don't transfer over the runtime instrumentation or the guarded
+ * storage control block pointers. These fields are cleared here instead
+ * of in copy_thread() to avoid premature freeing of associated memory
+ * on fork() failure. Wait to clear the RI flag because ->stack still
+ * refers to the source thread.
*/
- save_fpu_regs();
+ dst->thread.ri_cb = NULL;
+ dst->thread.gs_cb = NULL;
+ dst->thread.gs_bc_cb = NULL;
- memcpy(dst, src, arch_task_struct_size);
- dst->thread.fpu.regs = dst->thread.fpu.fprs;
return 0;
}
@@ -124,21 +135,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.last_break = 1;
frame->sf.back_chain = 0;
- frame->sf.gprs[5] = (unsigned long)frame + sizeof(struct stack_frame);
- frame->sf.gprs[6] = (unsigned long)p;
+ frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs;
+ frame->sf.gprs[12 - 6] = (unsigned long)p;
/* new return point is ret_from_fork */
- frame->sf.gprs[8] = (unsigned long)ret_from_fork;
+ frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork;
/* fake return stack for resume(), don't go back to schedule */
- frame->sf.gprs[9] = (unsigned long)frame;
+ frame->sf.gprs[15 - 6] = (unsigned long)frame;
/* Store access registers to kernel stack of new process. */
if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- frame->childregs.psw.addr =
- (unsigned long)__ret_from_fork;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.gprs[9] = (unsigned long)args->fn;
frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
frame->childregs.orig_gpr2 = -1;
@@ -150,13 +159,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
-
- /* Don't copy runtime instrumentation info */
- p->thread.ri_cb = NULL;
+ /*
+ * Clear the runtime instrumentation flag after the above childregs
+ * copy. The CB pointer was already cleared in arch_dup_task_struct().
+ */
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Don't copy guarded storage control block */
- p->thread.gs_cb = NULL;
- p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
@@ -178,8 +185,23 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
void execve_tail(void)
{
- current->thread.fpu.fpc = 0;
- asm volatile("sfpc %0" : : "d" (0));
+ current->thread.ufpu.fpc = 0;
+ fpu_sfpc(0);
+}
+
+struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next)
+{
+ save_user_fpu_regs();
+ save_kernel_fpu_regs(&prev->thread);
+ save_access_regs(&prev->thread.acrs[0]);
+ save_ri_cb(prev->thread.ri_cb);
+ save_gs_cb(prev->thread.gs_cb);
+ update_cr_regs(next);
+ restore_kernel_fpu_regs(&next->thread);
+ restore_access_regs(&next->thread.acrs[0]);
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);
+ restore_gs_cb(next->thread.gs_cb);
+ return __switch_to_asm(prev, next);
}
unsigned long __get_wchan(struct task_struct *p)
@@ -214,13 +236,13 @@ unsigned long __get_wchan(struct task_struct *p)
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= get_random_u32_below(PAGE_SIZE);
return sp & ~0xf;
}
static inline unsigned long brk_rnd(void)
{
- return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
+ return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index aa0e0e7fc773..11f70c1e2797 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,7 +18,9 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/machine.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -72,7 +74,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
this_cpu = smp_processor_id();
if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
__this_cpu_write(cpu_relax_retry, 0);
- cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+ cpu = cpumask_next_wrap(this_cpu, cpumask);
if (cpu >= nr_cpu_ids)
return;
if (arch_vcpu_is_preempted(cpu))
@@ -80,6 +82,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
+
/*
* cpu_init - initializes state that is per-CPU.
*/
@@ -96,15 +115,6 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, current);
}
-/*
- * cpu_have_feature - Test CPU features on module initialization
- */
-int cpu_have_feature(unsigned int num)
-{
- return elf_hwcap & (1UL << num);
-}
-EXPORT_SYMBOL(cpu_have_feature);
-
static void show_facilities(struct seq_file *m)
{
unsigned int bit;
@@ -201,21 +211,18 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_DFP;
/* huge page support */
- if (MACHINE_HAS_EDAT1)
+ if (cpu_has_edat1())
elf_hwcap |= HWCAP_HPAGE;
/* 64-bit register support for 31-bit processes */
elf_hwcap |= HWCAP_HIGH_GPRS;
/* transactional execution */
- if (MACHINE_HAS_TE)
+ if (machine_has_tx())
elf_hwcap |= HWCAP_TE;
- /*
- * Vector extension can be disabled with the "novx" parameter.
- * Use MACHINE_HAS_VX instead of facility bit 129.
- */
- if (MACHINE_HAS_VX) {
+ /* vector */
+ if (test_facility(129)) {
elf_hwcap |= HWCAP_VXRS;
if (test_facility(134))
elf_hwcap |= HWCAP_VXRS_BCD;
@@ -239,10 +246,10 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_NNPA;
/* guarded storage */
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
elf_hwcap |= HWCAP_GS;
- if (MACHINE_HAS_PCI_MIO)
+ if (test_machine_feature(MFEATURE_PCI_MIO))
elf_hwcap |= HWCAP_PCI_MIO;
/* virtualization support */
@@ -261,31 +268,35 @@ static int __init setup_elf_platform(void)
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
default: /* Use "z10" as default. */
- strcpy(elf_platform, "z10");
+ strscpy(elf_platform, "z10");
break;
case 0x2817:
case 0x2818:
- strcpy(elf_platform, "z196");
+ strscpy(elf_platform, "z196");
break;
case 0x2827:
case 0x2828:
- strcpy(elf_platform, "zEC12");
+ strscpy(elf_platform, "zEC12");
break;
case 0x2964:
case 0x2965:
- strcpy(elf_platform, "z13");
+ strscpy(elf_platform, "z13");
break;
case 0x3906:
case 0x3907:
- strcpy(elf_platform, "z14");
+ strscpy(elf_platform, "z14");
break;
case 0x8561:
case 0x8562:
- strcpy(elf_platform, "z15");
+ strscpy(elf_platform, "z15");
break;
case 0x3931:
case 0x3932:
- strcpy(elf_platform, "z16");
+ strscpy(elf_platform, "z16");
+ break;
+ case 0x9175:
+ case 0x9176:
+ strscpy(elf_platform, "z17");
break;
}
return 0;
@@ -374,21 +385,3 @@ const struct seq_operations cpuinfo_op = {
.stop = c_stop,
.show = show_cpuinfo,
};
-
-int s390_isolate_bp(void)
-{
- if (!test_facility(82))
- return -EOPNOTSUPP;
- set_thread_flag(TIF_ISOLATE_BP);
- return 0;
-}
-EXPORT_SYMBOL(s390_isolate_bp);
-
-int s390_isolate_bp_guest(void)
-{
- if (!test_facility(82))
- return -EOPNOTSUPP;
- set_thread_flag(TIF_ISOLATE_BP_GUEST);
- return 0;
-}
-EXPORT_SYMBOL(s390_isolate_bp_guest);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 53e0209229f8..e1240f6b29fa 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -7,10 +7,10 @@
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#include "asm/ptrace.h"
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/cpufeature.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/errno.h>
@@ -24,12 +24,17 @@
#include <linux/seccomp.h>
#include <linux/compat.h>
#include <trace/syscall.h>
+#include <asm/guarded_storage.h>
+#include <asm/access-regs.h>
#include <asm/page.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
-#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include <asm/facility.h>
+#include <asm/machine.h>
+#include <asm/ptrace.h>
+#include <asm/rwonce.h>
+#include <asm/fpu.h>
#include "entry.h"
@@ -41,17 +46,24 @@ void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
- struct per_regs old, new;
union ctlreg0 cr0_old, cr0_new;
union ctlreg2 cr2_old, cr2_new;
int cr0_changed, cr2_changed;
-
- __ctl_store(cr0_old.val, 0, 0);
- __ctl_store(cr2_old.val, 2, 2);
+ union {
+ struct ctlreg regs[3];
+ struct {
+ struct ctlreg control;
+ struct ctlreg start;
+ struct ctlreg end;
+ };
+ } old, new;
+
+ local_ctl_store(0, &cr0_old.reg);
+ local_ctl_store(2, &cr2_old.reg);
cr0_new = cr0_old;
cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE) {
+ if (machine_has_tx()) {
/* Set or clear transaction execution TXC bit 8. */
cr0_new.tcx = 1;
if (task->thread.per_flags & PER_FLAG_NO_TE)
@@ -66,7 +78,7 @@ void update_cr_regs(struct task_struct *task)
}
}
/* Take care of enable/disable of guarded storage. */
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
cr2_new.gse = 0;
if (task->thread.gs_cb)
cr2_new.gse = 1;
@@ -75,38 +87,38 @@ void update_cr_regs(struct task_struct *task)
cr0_changed = cr0_new.val != cr0_old.val;
cr2_changed = cr2_new.val != cr2_old.val;
if (cr0_changed)
- __ctl_load(cr0_new.val, 0, 0);
+ local_ctl_load(0, &cr0_new.reg);
if (cr2_changed)
- __ctl_load(cr2_new.val, 2, 2);
+ local_ctl_load(2, &cr2_new.reg);
/* Copy user specified PER registers */
- new.control = thread->per_user.control;
- new.start = thread->per_user.start;
- new.end = thread->per_user.end;
+ new.control.val = thread->per_user.control;
+ new.start.val = thread->per_user.start;
+ new.end.val = thread->per_user.end;
/* merge TIF_SINGLE_STEP into user specified PER registers. */
if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
- new.control |= PER_EVENT_BRANCH;
+ new.control.val |= PER_EVENT_BRANCH;
else
- new.control |= PER_EVENT_IFETCH;
- new.control |= PER_CONTROL_SUSPENSION;
- new.control |= PER_EVENT_TRANSACTION_END;
+ new.control.val |= PER_EVENT_IFETCH;
+ new.control.val |= PER_CONTROL_SUSPENSION;
+ new.control.val |= PER_EVENT_TRANSACTION_END;
if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
- new.control |= PER_EVENT_IFETCH;
- new.start = 0;
- new.end = -1UL;
+ new.control.val |= PER_EVENT_IFETCH;
+ new.start.val = 0;
+ new.end.val = -1UL;
}
/* Take care of the PER enablement bit in the PSW. */
- if (!(new.control & PER_EVENT_MASK)) {
+ if (!(new.control.val & PER_EVENT_MASK)) {
regs->psw.mask &= ~PSW_MASK_PER;
return;
}
regs->psw.mask |= PSW_MASK_PER;
- __ctl_store(old, 9, 11);
+ __local_ctl_store(9, 11, old.regs);
if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
- __ctl_load(new, 9, 11);
+ __local_ctl_load(9, 11, new.regs);
}
void user_enable_single_step(struct task_struct *task)
@@ -238,22 +250,15 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fpu.fpc;
+ tmp = child->thread.ufpu.fpc;
tmp <<= BITS_PER_LONG - 32;
} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
- tmp = *(addr_t *)
- ((addr_t) child->thread.fpu.vxrs + 2*offset);
- else
- tmp = *(addr_t *)
- ((addr_t) child->thread.fpu.fprs + offset);
-
+ tmp = *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
@@ -385,24 +390,16 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/*
* floating point control reg. is in the thread structure
*/
- if ((unsigned int) data != 0 ||
- test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ if ((unsigned int)data != 0)
return -EINVAL;
- child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+ child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32);
} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
- *(addr_t *)((addr_t)
- child->thread.fpu.vxrs + 2*offset) = data;
- else
- *(addr_t *)((addr_t)
- child->thread.fpu.fprs + offset) = data;
-
+ *(addr_t *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = data;
} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
/*
* Handle access to the per_info structure.
@@ -474,22 +471,20 @@ long arch_ptrace(struct task_struct *child, long request,
}
return 0;
case PTRACE_GET_LAST_BREAK:
- put_user(child->thread.last_break,
- (unsigned long __user *) data);
- return 0;
+ return put_user(child->thread.last_break, (unsigned long __user *)data);
case PTRACE_ENABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags &= ~PER_FLAG_NO_TE;
return 0;
case PTRACE_DISABLE_TE:
- if (!MACHINE_HAS_TE)
+ if (!machine_has_tx())
return -EIO;
child->thread.per_flags |= PER_FLAG_NO_TE;
child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
return 0;
case PTRACE_TE_ABORT_RAND:
- if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ if (!machine_has_tx() || (child->thread.per_flags & PER_FLAG_NO_TE))
return -EIO;
switch (data) {
case 0UL:
@@ -617,21 +612,14 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
/*
* floating point control reg. is in the thread structure
*/
- tmp = child->thread.fpu.fpc;
+ tmp = child->thread.ufpu.fpc;
} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
- tmp = *(__u32 *)
- ((addr_t) child->thread.fpu.vxrs + 2*offset);
- else
- tmp = *(__u32 *)
- ((addr_t) child->thread.fpu.fprs + offset);
-
+ tmp = *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset);
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
@@ -743,23 +731,14 @@ static int __poke_user_compat(struct task_struct *child,
/*
* floating point control reg. is in the thread structure
*/
- if (test_fp_ctl(tmp))
- return -EINVAL;
- child->thread.fpu.fpc = data;
+ child->thread.ufpu.fpc = data;
} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
/*
- * floating point regs. are either in child->thread.fpu
- * or the child->thread.fpu.vxrs array
+ * floating point regs. are in the child->thread.ufpu.vxrs array
*/
offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
- if (MACHINE_HAS_VX)
- *(__u32 *)((addr_t)
- child->thread.fpu.vxrs + 2*offset) = tmp;
- else
- *(__u32 *)((addr_t)
- child->thread.fpu.fprs + offset) = tmp;
-
+ *(__u32 *)((addr_t)child->thread.ufpu.vxrs + 2 * offset) = tmp;
} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
/*
* Handle access to the per_info structure.
@@ -824,9 +803,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
}
return 0;
case PTRACE_GET_LAST_BREAK:
- put_user(child->thread.last_break,
- (unsigned int __user *) data);
- return 0;
+ return put_user(child->thread.last_break, (unsigned int __user *)data);
}
return compat_ptrace_request(child, request, addr, data);
}
@@ -892,10 +869,10 @@ static int s390_fpregs_get(struct task_struct *target,
_s390_fp_regs fp_regs;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
- fp_regs.fpc = target->thread.fpu.fpc;
- fpregs_store(&fp_regs, &target->thread.fpu);
+ fp_regs.fpc = target->thread.ufpu.fpc;
+ fpregs_store(&fp_regs, &target->thread.ufpu);
return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
@@ -909,23 +886,17 @@ static int s390_fpregs_set(struct task_struct *target,
freg_t fprs[__NUM_FPRS];
if (target == current)
- save_fpu_regs();
-
- if (MACHINE_HAS_VX)
- convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
- else
- memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
-
- /* If setting FPC, must validate it first. */
+ save_user_fpu_regs();
+ convert_vx_to_fp(fprs, target->thread.ufpu.vxrs);
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
- u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+ u32 ufpc[2] = { target->thread.ufpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
0, offsetof(s390_fp_regs, fprs));
if (rc)
return rc;
- if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+ if (ufpc[1] != 0)
return -EINVAL;
- target->thread.fpu.fpc = ufpc[0];
+ target->thread.ufpu.fpc = ufpc[0];
}
if (rc == 0 && count > 0)
@@ -933,12 +904,7 @@ static int s390_fpregs_set(struct task_struct *target,
fprs, offsetof(s390_fp_regs, fprs), -1);
if (rc)
return rc;
-
- if (MACHINE_HAS_VX)
- convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
- else
- memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
-
+ convert_fp_to_vx(target->thread.ufpu.vxrs, fprs);
return rc;
}
@@ -985,12 +951,12 @@ static int s390_vxrs_low_get(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
return membuf_write(&to, vxrs, sizeof(vxrs));
}
@@ -1002,18 +968,18 @@ static int s390_vxrs_low_set(struct task_struct *target,
__u64 vxrs[__NUM_VXRS_LOW];
int i, rc;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = target->thread.ufpu.vxrs[i].low;
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
if (rc == 0)
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ target->thread.ufpu.vxrs[i].low = vxrs[i];
return rc;
}
@@ -1022,11 +988,11 @@ static int s390_vxrs_high_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
- return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ save_user_fpu_regs();
+ return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW,
__NUM_VXRS_HIGH * sizeof(__vector128));
}
@@ -1037,13 +1003,13 @@ static int s390_vxrs_high_set(struct task_struct *target,
{
int rc;
- if (!MACHINE_HAS_VX)
+ if (!cpu_has_vx())
return -ENODEV;
if (target == current)
- save_fpu_regs();
+ save_user_fpu_regs();
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+ target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1);
return rc;
}
@@ -1070,7 +1036,7 @@ static int s390_gs_cb_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1087,7 +1053,7 @@ static int s390_gs_cb_set(struct task_struct *target,
struct gs_cb gs_cb = { }, *data = NULL;
int rc;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!target->thread.gs_cb) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1111,7 +1077,7 @@ static int s390_gs_cb_set(struct task_struct *target,
target->thread.gs_cb = data;
*target->thread.gs_cb = gs_cb;
if (target == current) {
- __ctl_set_bit(2, 4);
+ local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
restore_gs_cb(target->thread.gs_cb);
}
preempt_enable();
@@ -1124,7 +1090,7 @@ static int s390_gs_bc_get(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data)
return -ENODATA;
@@ -1138,7 +1104,7 @@ static int s390_gs_bc_set(struct task_struct *target,
{
struct gs_cb *data = target->thread.gs_bc_cb;
- if (!MACHINE_HAS_GS)
+ if (!cpu_has_gs())
return -ENODEV;
if (!data) {
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -1558,13 +1524,6 @@ static const char *gpr_names[NUM_GPRS] = {
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
-unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
-{
- if (offset >= NUM_GPRS)
- return 0;
- return regs->gprs[offset];
-}
-
int regs_query_register_offset(const char *name)
{
unsigned long offset;
@@ -1584,29 +1543,3 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
return gpr_names[offset];
}
-
-static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
-{
- unsigned long ksp = kernel_stack_pointer(regs);
-
- return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
-}
-
-/**
- * regs_get_kernel_stack_nth() - get Nth entry of the stack
- * @regs:pt_regs which contains kernel stack pointer.
- * @n:stack entry number.
- *
- * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
- * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
- * this returns 0.
- */
-unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
-{
- unsigned long addr;
-
- addr = kernel_stack_pointer(regs) + n * sizeof(long);
- if (!regs_within_kernel_stack(regs, addr))
- return 0;
- return *(unsigned long *)addr;
-}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 4a22163962eb..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@@ -19,21 +20,16 @@
# r2 = Function to be called after store status
# r3 = Parameter for function
#
-ENTRY(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
+SYM_CODE_START(store_status)
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
- lghi %r1,__LC_GPREGS_SAVE_AREA
- stmg %r0,%r15,0(%r1)
- mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ GET_LC %r13
/* Control registers */
- lghi %r1,__LC_CREGS_SAVE_AREA
- stctg %c0,%c15,0(%r1)
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
- lghi %r1,__LC_AREGS_SAVE_AREA
- stam %a0,%a15,0(%r1)
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
- lghi %r1,__LC_FPREGS_SAVE_AREA
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@@ -51,21 +47,21 @@ ENTRY(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
- lghi %r1,__LC_FP_CREG_SAVE_AREA
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
- lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
- lghi %r1,__LC_PREFIX_SAVE_AREA
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
- lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
- larl %r4,.Lclkcmp
+ larl %r4,clkcmp
stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
- lghi %r1,__LC_PSW_SAVE_AREA
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)
@@ -73,9 +69,9 @@ ENTRY(store_status)
lgr %r9,%r2
lgr %r2,%r3
BR_EX %r9
-ENDPROC(store_status)
+SYM_CODE_END(store_status)
.section .bss
- .align 8
-.Lclkcmp: .quad 0x0000000000000000
+ .balign 8
+SYM_DATA_LOCAL(clkcmp, .quad 0x0000000000000000)
.previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index a9a1a6f45375..0ae297c82afd 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -26,53 +26,51 @@
*/
.text
-ENTRY(relocate_kernel)
- basr %r13,0 # base address
- .base:
- lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
- lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
- lg %r5,0(%r2) # read another word for indirection page
- aghi %r2,8 # increment pointer
- tml %r5,0x1 # is it a destination page?
- je .indir_check # NO, goto "indir_check"
- lgr %r6,%r5 # r6 = r5
- nill %r6,0xf000 # mask it out and...
- j .base # ...next iteration
- .indir_check:
- tml %r5,0x2 # is it a indirection page?
- je .done_test # NO, goto "done_test"
- nill %r5,0xf000 # YES, mask out,
- lgr %r2,%r5 # move it into the right register,
- j .base # and read next...
- .done_test:
- tml %r5,0x4 # is it the done indicator?
- je .source_test # NO! Well, then it should be the source indicator...
- j .done # ok, lets finish it here...
- .source_test:
- tml %r5,0x8 # it should be a source indicator...
- je .base # NO, ignore it...
- lgr %r8,%r5 # r8 = r5
- nill %r8,0xf000 # masking
- 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
- jo 0b
- j .base
- .done:
- lgr %r0,%r4 # subcode
- cghi %r3,0
- je .diag
- la %r4,load_psw-.base(%r13) # load psw-address into the register
- o %r3,4(%r4) # or load address into psw
- st %r3,4(%r4)
- mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
- .diag:
- diag %r0,%r0,0x308
-ENDPROC(relocate_kernel)
+SYM_CODE_START(relocate_kernel)
+ basr %r13,0 # base address
+.base:
+ lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
+ lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
+ lg %r5,0(%r2) # read another word for indirection page
+ aghi %r2,8 # increment pointer
+ tml %r5,0x1 # is it a destination page?
+ je .indir_check # NO, goto "indir_check"
+ lgr %r6,%r5 # r6 = r5
+ nill %r6,0xf000 # mask it out and...
+ j .base # ...next iteration
+.indir_check:
+ tml %r5,0x2 # is it a indirection page?
+ je .done_test # NO, goto "done_test"
+ nill %r5,0xf000 # YES, mask out,
+ lgr %r2,%r5 # move it into the right register,
+ j .base # and read next...
+.done_test:
+ tml %r5,0x4 # is it the done indicator?
+ je .source_test # NO! Well, then it should be the source indicator...
+ j .done # ok, lets finish it here...
+.source_test:
+ tml %r5,0x8 # it should be a source indicator...
+ je .base # NO, ignore it...
+ lgr %r8,%r5 # r8 = r5
+ nill %r8,0xf000 # masking
+0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+ jo 0b
+ j .base
+.done:
+ lgr %r0,%r4 # subcode
+ cghi %r3,0
+ je .diag
+ la %r4,load_psw-.base(%r13) # load psw-address into the register
+ o %r3,4(%r4) # or load address into psw
+ st %r3,4(%r4)
+ mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+.diag:
+ diag %r0,%r0,0x308
+SYM_CODE_END(relocate_kernel)
- .align 8
- load_psw:
- .long 0x00080000,0x80000000
- relocate_kernel_end:
- .align 8
- .globl relocate_kernel_len
- relocate_kernel_len:
- .quad relocate_kernel_end - relocate_kernel
+ .balign 8
+SYM_DATA_START_LOCAL(load_psw)
+ .long 0x00080000,0x80000000
+SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end)
+ .balign 8
+SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel)
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 000000000000..af10e6bdd34e
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+ rh->ret_addr = regs->gprs[14];
+ rh->frame = regs->gprs[15];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+ unsigned long correct_ret_addr)
+{
+ /* Replace fake return address with real one. */
+ regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+ return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 000000000000..32f069eed3f3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ebad41afe355..f244c5560e7f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -52,13 +52,15 @@
#include <linux/hugetlb.h>
#include <linux/kmemleak.h>
+#include <asm/archrandom.h>
#include <asm/boot_data.h>
+#include <asm/machine.h>
#include <asm/ipl.h>
#include <asm/facility.h>
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
@@ -73,7 +75,8 @@
#include <asm/numa.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
-#include <asm/mem_detect.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
#include <asm/uv.h>
#include <asm/asm-offsets.h>
#include "entry.h"
@@ -95,10 +98,10 @@ EXPORT_SYMBOL(console_irq);
* relocated above 2 GB, because it has to use 31 bit addresses.
* Such code and data is part of the .amode31 section.
*/
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
@@ -143,39 +146,41 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
-int __bootdata(noexec_disabled);
-unsigned long __bootdata(ident_map_size);
-struct mem_detect_info __bootdata(mem_detect);
-struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata_preserved(max_mappable);
+struct physmem_info __bootdata(physmem_info);
-unsigned long __bootdata_preserved(__kaslr_offset);
-unsigned long __bootdata(__amode31_base);
+struct vm_layout __bootdata_preserved(vm_layout);
+EXPORT_SYMBOL(vm_layout);
+int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
-unsigned long VMALLOC_START;
+char __bootdata(boot_rb)[PAGE_SIZE * 2];
+bool __bootdata(boot_earlyprintk);
+size_t __bootdata(boot_rb_off);
+char __bootdata(bootdebug_filter)[128];
+bool __bootdata(bootdebug);
+
+unsigned long __bootdata_preserved(VMALLOC_START);
EXPORT_SYMBOL(VMALLOC_START);
-unsigned long VMALLOC_END;
+unsigned long __bootdata_preserved(VMALLOC_END);
EXPORT_SYMBOL(VMALLOC_END);
-struct page *vmemmap;
+struct page *__bootdata_preserved(vmemmap);
EXPORT_SYMBOL(vmemmap);
-unsigned long vmemmap_size;
+unsigned long __bootdata_preserved(vmemmap_size);
-unsigned long MODULES_VADDR;
-unsigned long MODULES_END;
+unsigned long __bootdata_preserved(MODULES_VADDR);
+unsigned long __bootdata_preserved(MODULES_END);
/* An array with a pointer to the lowcore of every CPU. */
struct lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
-DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
-
/*
* The Write Back bit position in the physaddr is given by the SLPC PCI.
* Leaving the mask zero always uses write through which is safe
@@ -245,7 +250,7 @@ static void __init conmode_default(void)
char query_buffer[1024];
char *ptr;
- if (MACHINE_IS_VM) {
+ if (machine_is_vm()) {
cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
ptr = strstr(query_buffer, "SUBCHANNEL =");
@@ -283,7 +288,7 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
- } else if (MACHINE_IS_KVM) {
+ } else if (machine_is_kvm()) {
if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
SET_CONSOLE_VT220;
else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
@@ -304,7 +309,7 @@ static void __init setup_zfcpdump(void)
return;
if (oldmem_data.start)
return;
- strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+ strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
console_loglevel = 2;
}
#else
@@ -359,63 +364,30 @@ void *restart_stack;
unsigned long stack_alloc(void)
{
-#ifdef CONFIG_VMAP_STACK
- void *ret;
+ void *stack;
- ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
- NUMA_NO_NODE, __builtin_return_address(0));
- kmemleak_not_leak(ret);
- return (unsigned long)ret;
-#else
- return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
-#endif
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+ kmemleak_not_leak(stack);
+ return (unsigned long)stack;
}
void stack_free(unsigned long stack)
{
-#ifdef CONFIG_VMAP_STACK
- vfree((void *) stack);
-#else
- free_pages(stack, THREAD_SIZE_ORDER);
-#endif
+ vfree((void *)stack);
}
-int __init arch_early_irq_init(void)
+static unsigned long __init stack_alloc_early(void)
{
unsigned long stack;
- stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
- if (!stack)
- panic("Couldn't allocate async stack");
- S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
- return 0;
+ stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
+ return stack;
}
-void __init arch_call_rest_init(void)
+static void __init setup_lowcore(void)
{
- unsigned long stack;
-
- stack = stack_alloc();
- if (!stack)
- panic("Couldn't allocate kernel stack");
- current->stack = (void *) stack;
-#ifdef CONFIG_VMAP_STACK
- current->stack_vm_area = (void *) stack;
-#endif
- set_task_stack_end_magic(current);
- stack += STACK_INIT_OFFSET;
- S390_lowcore.kernel_stack = stack;
- call_on_stack_noreturn(rest_init, stack);
-}
-
-static void __init setup_lowcore_dat_off(void)
-{
- unsigned long int_psw_mask = PSW_KERNEL_BITS;
- unsigned long mcck_stack;
- struct lowcore *lc;
-
- if (IS_ENABLED(CONFIG_KASAN))
- int_psw_mask |= PSW_MASK_DAT;
+ struct lowcore *lc, *abs_lc;
/*
* Setup lowcore for boot cpu
@@ -426,44 +398,40 @@ static void __init setup_lowcore_dat_off(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
- lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
- lc->nodat_stack = ((unsigned long) &init_thread_union)
- + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->preempt_count = S390_lowcore.preempt_count;
+ lc->preempt_count = get_lowcore()->preempt_count;
nmi_alloc_mcesa_early(&lc->mcesad);
- lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
- lc->exit_timer = S390_lowcore.exit_timer;
- lc->user_timer = S390_lowcore.user_timer;
- lc->system_timer = S390_lowcore.system_timer;
- lc->steal_timer = S390_lowcore.steal_timer;
- lc->last_update_timer = S390_lowcore.last_update_timer;
- lc->last_update_clock = S390_lowcore.last_update_clock;
-
+ lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+ lc->exit_timer = get_lowcore()->exit_timer;
+ lc->user_timer = get_lowcore()->user_timer;
+ lc->system_timer = get_lowcore()->system_timer;
+ lc->steal_timer = get_lowcore()->steal_timer;
+ lc->last_update_timer = get_lowcore()->last_update_timer;
+ lc->last_update_clock = get_lowcore()->last_update_clock;
/*
* Allocate the global restart stack which is the same for
- * all CPUs in cast *one* of them does a PSW restart.
+ * all CPUs in case *one* of them does a PSW restart.
*/
- restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!restart_stack)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- restart_stack += STACK_INIT_OFFSET;
-
+ restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
+ lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+ lc->kernel_stack = get_lowcore()->kernel_stack;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
* restart data to the absolute zero lowcore. This is necessary if
@@ -473,47 +441,31 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
-
- mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!mcck_stack)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
-
- /* Setup absolute zero lowcore */
- put_abs_lowcore(restart_stack, lc->restart_stack);
- put_abs_lowcore(restart_fn, lc->restart_fn);
- put_abs_lowcore(restart_data, lc->restart_data);
- put_abs_lowcore(restart_source, lc->restart_source);
- put_abs_lowcore(restart_psw, lc->restart_psw);
-
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = get_lowcore()->user_asce;
+
+ system_ctlreg_init_save_area(lc);
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc);
set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
- struct lowcore *lc = lowcore_ptr[0];
- int cr;
-
- __ctl_clear_bit(0, 28);
- S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
- __ctl_set_bit(0, 28);
- put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
- put_abs_lowcore(program_new_psw, lc->program_new_psw);
- for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
- put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+ if (abs_lowcore_map(0, lowcore_ptr[0], false))
+ panic("Couldn't setup absolute lowcore");
}
static struct resource code_resource = {
@@ -544,25 +496,22 @@ static void __init setup_resources(void)
int j;
u64 i;
- code_resource.start = (unsigned long) _text;
- code_resource.end = (unsigned long) _etext - 1;
- data_resource.start = (unsigned long) _etext;
- data_resource.end = (unsigned long) _edata - 1;
- bss_resource.start = (unsigned long) __bss_start;
- bss_resource.end = (unsigned long) __bss_stop - 1;
+ code_resource.start = __pa_symbol(_text);
+ code_resource.end = __pa_symbol(_etext) - 1;
+ data_resource.start = __pa_symbol(_etext);
+ data_resource.end = __pa_symbol(_edata) - 1;
+ bss_resource.start = __pa_symbol(__bss_start);
+ bss_resource.end = __pa_symbol(__bss_stop) - 1;
for_each_mem_range(i, &start, &end) {
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*res), 8);
+ res = memblock_alloc_or_panic(sizeof(*res), 8);
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = start;
/*
* In memblock, end points to the first byte after the
- * range while in resourses, end points to the last byte in
+ * range while in resources, end points to the last byte in
* the range.
*/
res->end = end - 1;
@@ -574,10 +523,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = memblock_alloc(sizeof(*sub_res), 8);
- if (!sub_res)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*sub_res), 8);
+ sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -606,7 +552,6 @@ static void __init setup_resources(void)
static void __init setup_memory_end(void)
{
- memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size);
max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
}
@@ -638,6 +583,18 @@ static struct notifier_block kdump_mem_nb = {
#endif
/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+ unsigned long start, end;
+ struct reserved_range *range;
+
+ for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+ memblock_reserve(start, end - start);
+}
+
+/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
static void __init reserve_crashkernel(void)
@@ -647,8 +604,8 @@ static void __init reserve_crashkernel(void)
phys_addr_t low, high;
int rc;
- rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
- &crash_base);
+ rc = parse_crashkernel(boot_command_line, ident_map_size,
+ &crash_size, &crash_base, NULL, NULL);
crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
@@ -693,7 +650,7 @@ static void __init reserve_crashkernel(void)
return;
}
- if (!oldmem_data.start && MACHINE_IS_VM)
+ if (!oldmem_data.start && machine_is_vm())
diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
@@ -711,13 +668,13 @@ static void __init reserve_crashkernel(void)
*/
static void __init reserve_initrd(void)
{
-#ifdef CONFIG_BLK_DEV_INITRD
- if (!initrd_data.start || !initrd_data.size)
+ unsigned long addr, size;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
return;
- initrd_start = (unsigned long)__va(initrd_data.start);
- initrd_end = initrd_start + initrd_data.size;
- memblock_reserve(initrd_data.start, initrd_data.size);
-#endif
+ initrd_start = (unsigned long)__va(addr);
+ initrd_end = initrd_start + size;
+ memblock_reserve(addr, size);
}
/*
@@ -729,80 +686,64 @@ static void __init reserve_certificate_list(void)
memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
}
-static void __init reserve_mem_detect_info(void)
+static void __init reserve_physmem_info(void)
{
- unsigned long start, size;
+ unsigned long addr, size;
- get_mem_detect_reserved(&start, &size);
- if (size)
- memblock_reserve(start, size);
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+ memblock_reserve(addr, size);
}
-static void __init free_mem_detect_info(void)
+static void __init free_physmem_info(void)
{
- unsigned long start, size;
+ unsigned long addr, size;
- get_mem_detect_reserved(&start, &size);
- if (size)
- memblock_phys_free(start, size);
+ if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
+ memblock_phys_free(addr, size);
}
-static const char * __init get_mem_info_source(void)
-{
- switch (mem_detect.info_source) {
- case MEM_DETECT_SCLP_STOR_INFO:
- return "sclp storage info";
- case MEM_DETECT_DIAG260:
- return "diag260";
- case MEM_DETECT_SCLP_READ_INFO:
- return "sclp read info";
- case MEM_DETECT_BIN_SEARCH:
- return "binary search";
- }
- return "none";
-}
-
-static void __init memblock_add_mem_detect_info(void)
+static void __init memblock_add_physmem_info(void)
{
unsigned long start, end;
int i;
pr_debug("physmem info source: %s (%hhd)\n",
- get_mem_info_source(), mem_detect.info_source);
+ get_physmem_info_source(), physmem_info.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end) {
+ for_each_physmem_usable_range(i, &start, &end)
memblock_add(start, end - start);
+ for_each_physmem_online_range(i, &start, &end)
memblock_physmem_add(start, end - start);
- }
memblock_set_bottom_up(false);
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
}
/*
- * Check for initrd being in usable memory
+ * Reserve memory used for lowcore.
*/
-static void __init check_initrd(void)
+static void __init reserve_lowcore(void)
{
-#ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_data.start && initrd_data.size &&
- !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
- pr_err("The initial RAM disk does not fit into the memory\n");
- memblock_phys_free(initrd_data.start, initrd_data.size);
- initrd_start = initrd_end = 0;
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if (absolute_pointer(__identity_base) < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
}
-#endif
}
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
memblock_reserve(0, STARTUP_NORMAL_OFFSET);
memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
- memblock_reserve(__amode31_base, __eamode31 - __samode31);
+ memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
memblock_reserve(__pa(_stext), _end - _stext);
}
@@ -824,15 +765,15 @@ static void __init setup_memory(void)
static void __init relocate_amode31_section(void)
{
unsigned long amode31_size = __eamode31 - __samode31;
- long amode31_offset = __amode31_base - __samode31;
- long *ptr;
+ long amode31_offset, *ptr;
+ amode31_offset = AMODE31_START - (unsigned long)__samode31;
pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
/* Move original AMODE31 section to the new one */
- memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
+ memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
/* Zero out the old AMODE31 section to catch invalid accesses within it */
- memset((void *)__samode31, 0, amode31_size);
+ memset(__samode31, 0, amode31_size);
/* Update all AMODE31 region references */
for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
@@ -851,15 +792,15 @@ static void __init setup_cr(void)
__ctl_duct[4] = (unsigned long)__ctl_duald;
/* Update control registers CR2, CR5 and CR15 */
- __ctl_store(cr2.val, 2, 2);
- __ctl_store(cr5.val, 5, 5);
- __ctl_store(cr15.val, 15, 15);
+ local_ctl_store(2, &cr2.reg);
+ local_ctl_store(5, &cr5.reg);
+ local_ctl_store(15, &cr15.reg);
cr2.ducto = (unsigned long)__ctl_duct >> 6;
cr5.pasteo = (unsigned long)__ctl_duct >> 6;
cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
- __ctl_load(cr2.val, 2, 2);
- __ctl_load(cr5.val, 5, 5);
- __ctl_load(cr15.val, 15, 15);
+ system_ctl_load(2, &cr2.reg);
+ system_ctl_load(5, &cr5.reg);
+ system_ctl_load(15, &cr15.reg);
}
/*
@@ -869,9 +810,7 @@ static void __init setup_randomness(void)
{
struct sysinfo_3_2_2 *vmms;
- vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!vmms)
- panic("Failed to allocate memory for sysinfo structure\n");
+ vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
@@ -881,22 +820,6 @@ static void __init setup_randomness(void)
}
/*
- * Find the correct size for the task_struct. This depends on
- * the size of the struct fpu at the end of the thread_struct
- * which is embedded in the task_struct.
- */
-static void __init setup_task_size(void)
-{
- int task_size = sizeof(struct task_struct);
-
- if (!MACHINE_HAS_VX) {
- task_size -= sizeof(__vector128) * __NUM_VXRS;
- task_size += sizeof(freg_t) * __NUM_FPRS;
- }
- arch_task_struct_size = task_size;
-}
-
-/*
* Issue diagnose 318 to set the control program name and
* version codes.
*/
@@ -928,7 +851,7 @@ static void __init log_component_list(void)
pr_info("Linux is running with Secure-IPL enabled\n");
else
pr_info("Linux is running with Secure-IPL disabled\n");
- ptr = (void *) early_ipl_comp_list_addr;
+ ptr = __va(early_ipl_comp_list_addr);
end = (void *) ptr + early_ipl_comp_list_size;
pr_info("The IPL report contains the following components:\n");
while (ptr < end) {
@@ -947,6 +870,23 @@ static void __init log_component_list(void)
}
/*
+ * Print avoiding interpretation of % in buf and taking bootdebug option
+ * into consideration.
+ */
+static void __init print_rb_entry(const char *buf)
+{
+ char fmt[] = KERN_SOH "0boot: %s";
+ int level = printk_get_level(buf);
+
+ buf = skip_timestamp(printk_skip_level(buf));
+ if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
+ return;
+
+ fmt[1] = level;
+ printk(fmt, buf);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -956,15 +896,21 @@ void __init setup_arch(char **cmdline_p)
/*
* print what head.S has found out about the machine
*/
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
pr_info("Linux is running as a z/VM "
"guest operating system in 64-bit mode\n");
- else if (MACHINE_IS_KVM)
+ else if (machine_is_kvm())
pr_info("Linux is running under KVM in 64-bit mode\n");
- else if (MACHINE_IS_LPAR)
+ else if (machine_is_lpar())
pr_info("Linux is running natively in 64-bit mode\n");
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ /* Print decompressor messages if not already printed */
+ if (!boot_earlyprintk)
+ boot_rb_foreach(print_rb_entry);
+
+ if (machine_has_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
log_component_list();
@@ -988,21 +934,22 @@ void __init setup_arch(char **cmdline_p)
os_info_init();
setup_ipl();
- setup_task_size();
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
+ reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
- reserve_mem_detect_info();
+ reserve_physmem_info();
memblock_set_current_limit(ident_map_size);
memblock_allow_resize();
/* Get information about *all* installed memory */
- memblock_add_mem_detect_info();
+ memblock_add_physmem_info();
- free_mem_detect_info();
+ free_physmem_info();
setup_memory_end();
memblock_dump_all();
setup_memory();
@@ -1012,33 +959,29 @@ void __init setup_arch(char **cmdline_p)
setup_uv();
dma_contiguous_reserve(ident_map_size);
vmcp_cma_reserve();
- if (MACHINE_HAS_EDAT2)
+ if (cpu_has_edat2())
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
- check_initrd();
reserve_crashkernel();
#ifdef CONFIG_CRASH_DUMP
/*
- * Be aware that smp_save_dump_cpus() triggers a system reset.
+ * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
* Therefore CPU and device initialization should be done afterwards.
*/
- smp_save_dump_cpus();
+ smp_save_dump_secondary_cpus();
#endif
setup_resources();
- setup_lowcore_dat_off();
+ setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
cpu_init();
numa_setup();
smp_detect_cpus();
topology_init_early();
-
- if (test_facility(193))
- static_branch_enable(&cpu_has_bear);
-
+ setup_protection_map();
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
@@ -1046,7 +989,9 @@ void __init setup_arch(char **cmdline_p)
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
- setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+ smp_save_dump_ipl_cpu();
+#endif
/* Setup default console */
conmode_default();
@@ -1062,3 +1007,8 @@ void __init setup_arch(char **cmdline_p)
/* Add system specific data to the random pool */
setup_randomness();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ sclp_init();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 38258f817048..e48013cd832c 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/rseq.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
@@ -29,9 +30,9 @@
#include <linux/compat.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
+#include <asm/vdso-symbols.h>
+#include <asm/access-regs.h>
#include <asm/lowcore.h>
-#include <asm/switch_to.h>
-#include <asm/vdso.h>
#include "entry.h"
/*
@@ -108,7 +109,7 @@ struct rt_sigframe
static void store_sigregs(void)
{
save_access_regs(current->thread.acrs);
- save_fpu_regs();
+ save_user_fpu_regs();
}
/* Load registers after signal return */
@@ -130,7 +131,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+ fpregs_store(&user_sregs.fpregs, &current->thread.ufpu);
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
return -EFAULT;
return 0;
@@ -149,10 +150,6 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
return -EINVAL;
- /* Test the floating-point-control word. */
- if (test_fp_ctl(user_sregs.fpregs.fpc))
- return -EINVAL;
-
/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
@@ -168,7 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+ fpregs_load(&user_sregs.fpregs, &current->thread.ufpu);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
@@ -182,13 +179,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
int i;
/* Save vector registers to signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
for (i = 0; i < __NUM_VXRS_LOW; i++)
- vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
+ vxrs[i] = current->thread.ufpu.vxrs[i].low;
if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
sizeof(sregs_ext->vxrs_low)) ||
__copy_to_user(&sregs_ext->vxrs_high,
- current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
}
@@ -202,15 +199,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
int i;
/* Restore vector registers from signal stack */
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
sizeof(sregs_ext->vxrs_low)) ||
- __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW,
&sregs_ext->vxrs_high,
sizeof(sregs_ext->vxrs_high)))
return -EFAULT;
for (i = 0; i < __NUM_VXRS_LOW; i++)
- *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
+ current->thread.ufpu.vxrs[i].low = vxrs[i];
}
return 0;
}
@@ -225,7 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
goto badframe;
set_current_blocked(&set);
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -249,7 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
- save_fpu_regs();
+ save_user_fpu_regs();
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
@@ -300,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
* included in the signal frame on a 31-bit system.
*/
frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
- if (MACHINE_HAS_VX)
+ if (cpu_has_vx())
frame_size += sizeof(frame->sregs_ext);
frame = get_sigframe(ka, regs, frame_size);
if (frame == (void __user *) -1UL)
@@ -377,7 +374,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
* included in the signal frame on a 31-bit system.
*/
uc_flags = 0;
- if (MACHINE_HAS_VX) {
+ if (cpu_has_vx()) {
frame_size += sizeof(_sigregs_ext);
uc_flags |= UC_VXRS;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..81f12bb77f62 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -18,6 +18,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/export.h>
@@ -36,16 +37,20 @@
#include <linux/sched/task_stack.h>
#include <linux/crash_dump.h>
#include <linux/kprobes.h>
+#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
+#include <asm/machine.h>
+#include <asm/ctlreg.h>
+#include <asm/pfault.h>
#include <asm/diag.h>
-#include <asm/switch_to.h>
#include <asm/facility.h>
+#include <asm/fpu.h>
#include <asm/ipl.h>
#include <asm/setup.h>
#include <asm/irq.h>
#include <asm/tlbflush.h>
#include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
#include <asm/sclp.h>
#include <asm/debug.h>
#include <asm/os_info.h>
@@ -55,6 +60,7 @@
#include <asm/stacktrace.h>
#include <asm/topology.h>
#include <asm/vdso.h>
+#include <asm/maccess.h>
#include "entry.h"
enum {
@@ -70,18 +76,15 @@ enum {
CPU_STATE_CONFIGURED,
};
-static DEFINE_PER_CPU(struct cpu *, cpu_device);
-
-struct pcpu {
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- unsigned long ec_clk; /* sigp timestamp for ec_xxx */
- signed char state; /* physical cpu state */
- signed char polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
-
static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -96,13 +99,6 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
static unsigned int smp_max_threads __initdata = -1U;
cpumask_t cpu_setup_mask;
-static int __init early_nosmt(char *s)
-{
- smp_max_threads = 1;
- return 0;
-}
-early_param("nosmt", early_nosmt);
-
static int __init early_smt(char *s)
{
get_option(&s, &smp_max_threads);
@@ -112,7 +108,7 @@ early_param("smt", early_smt);
/*
* The smp_cpu_state_mutex must be held when changing the state or polarization
- * member of a pcpu data structure within the pcpu_devices arreay.
+ * member of a pcpu data structure within the pcpu_devices array.
*/
DEFINE_MUTEX(smp_cpu_state_mutex);
@@ -172,8 +168,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
@@ -199,7 +195,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
mcck_stack = stack_alloc();
if (!lc || !nodat_stack || !async_stack || !mcck_stack)
goto out;
- memcpy(lc, &S390_lowcore, 512);
+ memcpy(lc, get_lowcore(), 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
@@ -212,10 +208,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_mcesa(&lc->mcesad))
goto out;
+ if (abs_lowcore_map(cpu, lc, true))
+ goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
return 0;
+out_mcesa:
+ nmi_free_mcesa(&lc->mcesad);
out:
stack_free(mcck_stack);
stack_free(async_stack);
@@ -224,19 +224,18 @@ out:
return -ENOMEM;
}
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[cpu] = NULL;
+ abs_lowcore_unmap(cpu);
nmi_free_mcesa(&lc->mcesad);
stack_free(async_stack);
stack_free(mcck_stack);
@@ -246,37 +245,37 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
- struct lowcore *lc = lowcore_ptr[cpu];
+ struct lowcore *lc, *abs_lc;
+ lc = lowcore_ptr[cpu];
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
- lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
lc->user_asce = s390_invalid_asce;
- lc->machine_flags = S390_lowcore.machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
- __ctl_store(lc->cregs_save_area, 0, 15);
- lc->cregs_save_area[1] = lc->kernel_asce;
+ abs_lc = get_abs_lowcore();
+ memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+ put_abs_lowcore(abs_lc);
+ lc->cregs_save_area[1] = lc->user_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
- lc->kernel_stack = (unsigned long) task_stack_page(tsk)
- + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->current_task = (unsigned long) tsk;
+ lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
+ lc->current_task = (unsigned long)tsk;
lc->lpp = LPP_MAGIC;
lc->current_pid = tsk->pid;
lc->user_timer = tsk->thread.user_timer;
@@ -287,18 +286,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->steal_timer = 0;
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@@ -311,20 +308,23 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void pcpu_delegate(struct pcpu *pcpu,
+static void pcpu_delegate(struct pcpu *pcpu, int cpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
- struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
- unsigned int source_cpu = stap();
+ struct lowcore *lc, *abs_lc;
+ unsigned int source_cpu;
+
+ lc = lowcore_ptr[cpu];
+ source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
pcpu_delegate_fn *, func, void *, data);
}
/* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
/* Restart func on the target cpu and stop the current cpu. */
if (lc) {
lc->restart_stack = stack;
@@ -332,12 +332,13 @@ static void pcpu_delegate(struct pcpu *pcpu,
lc->restart_data = (unsigned long)data;
lc->restart_source = source_cpu;
} else {
- put_abs_lowcore(restart_stack, stack);
- put_abs_lowcore(restart_fn, (unsigned long)func);
- put_abs_lowcore(restart_data, (unsigned long)data);
- put_abs_lowcore(restart_source, source_cpu);
+ abs_lc = get_abs_lowcore();
+ abs_lc->restart_stack = stack;
+ abs_lc->restart_fn = (unsigned long)func;
+ abs_lc->restart_data = (unsigned long)data;
+ abs_lc->restart_source = source_cpu;
+ put_abs_lowcore(abs_lc);
}
- __bpon();
asm volatile(
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
@@ -364,38 +365,22 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
- pcpu_devices[0].address = stap();
+ per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- struct pcpu *pcpu;
-
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
* Call function on the ipl CPU.
*/
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
- if (pcpu_devices[0].address == stap())
- lc = &S390_lowcore;
+ if (ipl_pcpu->address == stap())
+ lc = get_lowcore();
- pcpu_delegate(&pcpu_devices[0], func, data,
- lc->nodat_stack);
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -403,21 +388,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
- if (pcpu_running(pcpu_devices + cpu))
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@@ -425,11 +410,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
void notrace smp_yield_cpu(int cpu)
{
- if (!MACHINE_HAS_DIAG9C)
+ if (!machine_has_diag9c())
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@@ -450,7 +435,7 @@ void notrace smp_emergency_stop(void)
end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
@@ -459,7 +444,7 @@ void notrace smp_emergency_stop(void)
}
while (get_tod_clock() < end) {
for_each_cpu(cpu, &cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@@ -474,10 +459,11 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
@@ -489,8 +475,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
- pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
- while (!pcpu_stopped(pcpu_devices + cpu))
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -504,7 +491,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@@ -512,7 +499,7 @@ static void smp_handle_ext_call(void)
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
if (test_bit(ec_mcck_pending, &bits))
- __s390_handle_mcck();
+ s390_handle_mcck();
if (test_bit(ec_irq_work, &bits))
irq_work_run();
}
@@ -529,12 +516,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@@ -542,63 +529,18 @@ void arch_send_call_function_single_ipi(int cpu)
* it goes straight through and wastes no time serializing
* anything. Worst case is that we lose a reschedule ...
*/
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
-/*
- * parameter area for the set/clear control bit callbacks
- */
-struct ec_creg_mask_parms {
- unsigned long orval;
- unsigned long andval;
- int cr;
-};
-
-/*
- * callback for setting/clearing control bits
- */
-static void smp_ctl_bit_callback(void *info)
-{
- struct ec_creg_mask_parms *pp = info;
- unsigned long cregs[16];
-
- __ctl_store(cregs, 0, 15);
- cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
- __ctl_load(cregs, 0, 15);
-}
-
-static DEFINE_SPINLOCK(ctl_lock);
-
-void smp_ctl_set_clear_bit(int cr, int bit, bool set)
-{
- struct ec_creg_mask_parms parms = { .cr = cr, };
- u64 ctlreg;
-
- if (set) {
- parms.orval = 1UL << bit;
- parms.andval = -1UL;
- } else {
- parms.orval = 0;
- parms.andval = ~(1UL << bit);
- }
- spin_lock(&ctl_lock);
- get_abs_lowcore(ctlreg, cregs_save_area[cr]);
- ctlreg = (ctlreg & parms.andval) | parms.orval;
- put_abs_lowcore(cregs_save_area[cr], ctlreg);
- spin_unlock(&ctl_lock);
- on_each_cpu(smp_ctl_bit_callback, &parms, 1);
-}
-EXPORT_SYMBOL(smp_ctl_set_clear_bit);
-
#ifdef CONFIG_CRASH_DUMP
int smp_store_status(int cpu)
@@ -607,16 +549,16 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+ if (!cpu_has_vx() && !cpu_has_gs())
return 0;
pa = lc->mcesad & MCESA_ORIGIN_MASK;
- if (MACHINE_HAS_GS)
+ if (cpu_has_gs())
pa |= lc->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
@@ -626,7 +568,7 @@ int smp_store_status(int cpu)
/*
* Collect CPU state of the previous, crashed system.
- * There are four cases:
+ * There are three cases:
* 1) standard zfcp/nvme dump
* condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
@@ -639,46 +581,45 @@ int smp_store_status(int cpu)
* with sigp stop-and-store-status. The firmware or the boot-loader
* stored the registers of the boot CPU in the absolute lowcore in the
* memory of the old system.
- * 3) kdump and the old kernel did not store the CPU state,
- * or stand-alone kdump for DASD
- * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * 3) kdump or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The kexec code or the boot-loader
* stored the registers of the boot CPU in the memory of the old system.
- * 4) kdump and the old kernel stored the CPU state
- * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
- * This case does not exist for s390 anymore, setup_arch explicitly
- * deactivates the elfcorehdr= kernel parameter
+ *
+ * Note that the legacy kdump mode where the old kernel stored the CPU states
+ * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr=
+ * kernel parameter. The is_kdump_kernel() implementation on s390 is independent
+ * of the elfcorehdr= parameter.
*/
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, __vector128 *vxrs)
+static bool dump_available(void)
{
- if (is_boot_cpu)
- vxrs = boot_cpu_vector_save_area;
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
- save_area_add_vxrs(sa, vxrs);
+ return oldmem_data.start || is_ipl_type_dump();
}
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
- bool is_boot_cpu, void *regs)
+void __init smp_save_dump_ipl_cpu(void)
{
- if (is_boot_cpu)
- copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
- else
- __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
+ struct save_area *sa;
+ void *regs;
+
+ if (!dump_available())
+ return;
+ sa = save_area_alloc(true);
+ regs = memblock_alloc_or_panic(512, 8);
+ copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
save_area_add_regs(sa, regs);
+ memblock_free(regs, 512);
+ if (cpu_has_vx())
+ save_area_add_vxrs(sa, boot_cpu_vector_save_area);
}
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
{
int addr, boot_cpu_addr, max_cpu_addr;
struct save_area *sa;
- bool is_boot_cpu;
void *page;
- if (!(oldmem_data.start || is_ipl_type_dump()))
- /* No previous system present, normal boot. */
+ if (!dump_available())
return;
/* Allocate a page as dumping area for the store status sigps */
page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
@@ -691,26 +632,18 @@ void __init smp_save_dump_cpus(void)
boot_cpu_addr = stap();
max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
for (addr = 0; addr <= max_cpu_addr; addr++) {
+ if (addr == boot_cpu_addr)
+ continue;
if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- is_boot_cpu = (addr == boot_cpu_addr);
- /* Allocate save area */
- sa = save_area_alloc(is_boot_cpu);
- if (!sa)
- panic("could not allocate memory for save area\n");
- if (MACHINE_HAS_VX)
- /* Get the vector registers */
- smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
- /*
- * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
- * of the boot CPU are stored in the HSA. To retrieve
- * these registers an SCLP request is required which is
- * done by drivers/s390/char/zcore.c:init_cpu_info()
- */
- if (!is_boot_cpu || oldmem_data.start)
- /* Get the CPU registers */
- smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+ sa = save_area_alloc(false);
+ __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+ save_area_add_regs(sa, page);
+ if (cpu_has_vx()) {
+ __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+ save_area_add_vxrs(sa, page);
+ }
}
memblock_free(page, PAGE_SIZE);
diag_amode31_ops.diag308_reset();
@@ -720,17 +653,36 @@ void __init smp_save_dump_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
- pcpu_devices[cpu].polarization = val;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).polarization;
+}
+
+void smp_cpu_set_capacity(int cpu, unsigned long val)
+{
+ per_cpu(pcpu_devices, cpu).capacity = val;
+}
+
+unsigned long smp_cpu_get_capacity(int cpu)
+{
+ return per_cpu(pcpu_devices, cpu).capacity;
+}
+
+void smp_set_core_capacity(int cpu, unsigned long val)
+{
+ int i;
+
+ cpu = smp_get_base_cpu(cpu);
+ for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++)
+ smp_cpu_set_capacity(i, val);
}
int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].address;
+ return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -754,8 +706,6 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
}
}
-static int smp_add_present_cpu(int cpu);
-
static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
bool configured, bool early)
{
@@ -771,15 +721,16 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
else
pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
set_cpu_present(cpu, true);
- if (!early && smp_add_present_cpu(cpu) != 0)
+ if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
else
nr++;
@@ -806,7 +757,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
- core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@@ -831,10 +782,7 @@ void __init smp_detect_cpus(void)
u16 address;
/* Get CPU information */
- info = memblock_alloc(sizeof(*info), 8);
- if (!info)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*info), 8);
+ info = memblock_alloc_or_panic(sizeof(*info), 8);
smp_get_core_info(info, 1);
/* Find boot CPU type */
if (sclp.has_core_type) {
@@ -853,6 +801,7 @@ void __init smp_detect_cpus(void)
mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
pcpu_set_smt(mtid);
+ cpu_smt_set_num_threads(smp_cpu_mtid + 1, smp_cpu_mtid + 1);
/* Print number of CPUs */
c_cpus = s_cpus = 0;
@@ -866,9 +815,6 @@ void __init smp_detect_cpus(void)
s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
-
- /* Add CPUs present at boot */
- __smp_rescan_cpus(info, true);
memblock_free(info, sizeof(*info));
}
@@ -877,17 +823,18 @@ void __init smp_detect_cpus(void)
*/
static void smp_start_secondary(void *cpuvoid)
{
+ struct lowcore *lc = get_lowcore();
int cpu = raw_smp_processor_id();
- S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long)restart_stack;
- S390_lowcore.restart_fn = (unsigned long)do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1U;
- S390_lowcore.restart_flags = 0;
- restore_access_regs(S390_lowcore.access_regs_save_area);
+ lc->last_update_clock = get_tod_clock();
+ lc->restart_stack = (unsigned long)restart_stack;
+ lc->restart_fn = (unsigned long)do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1U;
+ lc->restart_flags = 0;
+ restore_access_regs(lc->access_regs_save_area);
cpu_init();
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
init_cpu_timer();
vtime_init();
vdso_getcpu_init();
@@ -908,7 +855,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -920,12 +867,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
rc = pcpu_alloc_lowcore(pcpu, cpu);
if (rc)
return rc;
+ /*
+ * Make sure global control register contents do not change
+ * until new CPU has initialized control registers.
+ */
+ system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
+ system_ctlreg_unlock();
return 0;
}
@@ -940,7 +893,7 @@ early_param("possible_cpus", _setup_possible_cpus);
int __cpu_disable(void)
{
- unsigned long cregs[16];
+ struct ctlreg cregs[16];
int cpu;
/* Handle possible pending IPIs */
@@ -952,11 +905,11 @@ int __cpu_disable(void)
/* Disable pseudo page faults on this cpu. */
pfault_fini();
/* Disable interrupt sources via control register. */
- __ctl_store(cregs, 0, 15);
- cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
- cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
- cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
- __ctl_load(cregs, 0, 15);
+ __local_ctl_store(0, 15, cregs);
+ cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */
+ __local_ctl_load(0, 15, cregs);
clear_cpu_flag(CIF_NOHZ_DELAY);
return 0;
}
@@ -966,19 +919,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
+ pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- __bpon();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@@ -997,30 +950,36 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- /* request the 0x1201 emergency signal external interrupt */
if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
- /* request the 0x1202 external call external interrupt */
+ system_ctl_set_bit(0, 14);
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
+ system_ctl_set_bit(0, 13);
+ smp_rescan_cpus(true);
}
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
+ struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
- pcpu->state = CPU_STATE_CONFIGURED;
- S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH);
}
void __init smp_setup_processor_id(void)
{
- pcpu_devices[0].address = stap();
- S390_lowcore.cpu_nr = 0;
- S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
- S390_lowcore.spinlock_index = 0;
+ struct lowcore *lc = get_lowcore();
+
+ lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
}
/*
@@ -1040,7 +999,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -1060,15 +1019,13 @@ static ssize_t cpu_configure_store(struct device *dev,
cpus_read_lock();
mutex_lock(&smp_cpu_state_mutex);
rc = -EBUSY;
- /* disallow configuration changes of online cpus and cpu 0 */
+ /* disallow configuration changes of online cpus */
cpu = dev->id;
cpu = smp_get_base_cpu(cpu);
- if (cpu == 0)
- goto out;
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@@ -1080,7 +1037,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_STANDBY;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1095,7 +1052,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_CONFIGURED;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1114,7 +1071,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -1140,35 +1097,34 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
}
-static int smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
+{
+ return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
{
- struct device *s;
- struct cpu *c;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
- per_cpu(cpu_device, cpu) = c;
- s = &c->dev;
- c->hotpluggable = 1;
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
rc = register_cpu(c, cpu);
if (rc)
goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
rc = topology_cpu_init(c);
@@ -1177,14 +1133,14 @@ static int smp_add_present_cpu(int cpu)
return 0;
out_topology:
- sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
out_cpu:
unregister_cpu(c);
out:
return rc;
}
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
{
struct sclp_core_info *info;
int nr;
@@ -1193,7 +1149,7 @@ int __ref smp_rescan_cpus(void)
if (!info)
return -ENOMEM;
smp_get_core_info(info, 0);
- nr = __smp_rescan_cpus(info, false);
+ nr = __smp_rescan_cpus(info, early);
kfree(info);
if (nr)
topology_schedule_update();
@@ -1210,7 +1166,7 @@ static ssize_t __ref rescan_store(struct device *dev,
rc = lock_device_hotplug_sysfs();
if (rc)
return rc;
- rc = smp_rescan_cpus();
+ rc = smp_rescan_cpus(false);
unlock_device_hotplug();
return rc ? rc : count;
}
@@ -1218,77 +1174,19 @@ static DEVICE_ATTR_WO(rescan);
static int __init s390_smp_init(void)
{
- int cpu, rc = 0;
+ struct device *dev_root;
+ int rc;
- rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
- if (rc)
- return rc;
- for_each_present_cpu(cpu) {
- rc = smp_add_present_cpu(cpu);
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_rescan);
+ put_device(dev_root);
if (rc)
- goto out;
+ return rc;
}
-
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
rc = rc <= 0 ? rc : 0;
-out:
return rc;
}
subsys_initcall(s390_smp_init);
-
-static __always_inline void set_new_lowcore(struct lowcore *lc)
-{
- union register_pair dst, src;
- u32 pfx;
-
- src.even = (unsigned long) &S390_lowcore;
- src.odd = sizeof(S390_lowcore);
- dst.even = (unsigned long) lc;
- dst.odd = sizeof(*lc);
- pfx = __pa(lc);
-
- asm volatile(
- " mvcl %[dst],%[src]\n"
- " spx %[pfx]\n"
- : [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
- : [pfx] "Q" (pfx)
- : "memory", "cc");
-}
-
-static int __init smp_reinit_ipl_cpu(void)
-{
- unsigned long async_stack, nodat_stack, mcck_stack;
- struct lowcore *lc, *lc_ipl;
- unsigned long flags, cr0;
- u64 mcesad;
-
- lc_ipl = lowcore_ptr[0];
- lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
- async_stack = stack_alloc();
- mcck_stack = stack_alloc();
- if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
- panic("Couldn't allocate memory");
-
- local_irq_save(flags);
- local_mcck_disable();
- set_new_lowcore(lc);
- S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
- S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
- S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
- __ctl_store(cr0, 0, 0);
- __ctl_clear_bit(0, 28); /* disable lowcore protection */
- S390_lowcore.mcesad = mcesad;
- __ctl_load(cr0, 0, 0);
- lowcore_ptr[0] = lc;
- local_mcck_enable();
- local_irq_restore(flags);
-
- free_pages(lc_ipl->async_stack - STACK_INIT_OFFSET, THREAD_SIZE_ORDER);
- memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
- memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
-
- return 0;
-}
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 7ee455e8e3d5..b153a395f46d 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -5,10 +5,15 @@
* Copyright IBM Corp. 2006
*/
+#include <linux/perf_event.h>
#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
+#include <asm/ptrace.h>
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
@@ -40,12 +45,12 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (!addr)
return -EINVAL;
-#ifdef CONFIG_KPROBES
+#ifdef CONFIG_RETHOOK
/*
- * Mark stacktraces with kretprobed functions on them
+ * Mark stacktraces with krethook functions on them
* as unreliable.
*/
- if (state.ip == (unsigned long)__kretprobe_trampoline)
+ if (state.ip == (unsigned long)arch_rethook_trampoline)
return -EINVAL;
#endif
@@ -58,3 +63,103 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return -EINVAL;
return 0;
}
+
+static inline bool store_ip(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry, bool perf,
+ unsigned long ip)
+{
+#ifdef CONFIG_PERF_EVENTS
+ if (perf) {
+ if (perf_callchain_store(entry, ip))
+ return false;
+ return true;
+ }
+#endif
+ return consume_entry(cookie, ip);
+}
+
+static inline bool ip_invalid(unsigned long ip)
+{
+ /*
+ * Perform some basic checks if an instruction address taken
+ * from unreliable source is invalid.
+ */
+ if (ip & 1)
+ return true;
+ if (ip < mmap_min_addr)
+ return true;
+ if (ip >= current->mm->context.asce_limit)
+ return true;
+ return false;
+}
+
+static inline bool ip_within_vdso(unsigned long ip)
+{
+ return in_range(ip, current->mm->context.vdso_base, vdso_text_size());
+}
+
+void arch_stack_walk_user_common(stack_trace_consume_fn consume_entry, void *cookie,
+ struct perf_callchain_entry_ctx *entry,
+ const struct pt_regs *regs, bool perf)
+{
+ struct stack_frame_vdso_wrapper __user *sf_vdso;
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ if (!current->mm)
+ return;
+ ip = instruction_pointer(regs);
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
+ return;
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (1) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ /*
+ * VDSO entry code has a non-standard stack frame layout.
+ * See VDSO user wrapper code for details.
+ */
+ if (!sp && ip_within_vdso(ip)) {
+ sf_vdso = (void __user *)sf;
+ if (__get_user(ip, &sf_vdso->return_address))
+ break;
+ sp = (unsigned long)sf + STACK_FRAME_VDSO_OVERHEAD;
+ sf = (void __user *)sp;
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ } else {
+ sf = (void __user *)sp;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ }
+ /* Sanity check: ABI requires SP to be 8 byte aligned. */
+ if (sp & 0x7)
+ break;
+ if (ip_invalid(ip)) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (!first)
+ break;
+ ip = regs->gprs[14];
+ if (ip_invalid(ip))
+ break;
+ }
+ if (!store_ip(consume_entry, cookie, entry, perf, ip))
+ break;
+ first = false;
+ }
+ pagefault_enable();
+}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ arch_stack_walk_user_common(consume_entry, cookie, NULL, regs, false);
+}
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 4d141e2c132e..d40f0b983e74 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -17,6 +17,7 @@
#include <asm/ebcdic.h>
#include <asm/facility.h>
#include <asm/sthyi.h>
+#include <asm/asm.h>
#include "entry.h"
#define DED_WEIGHT 0xffff
@@ -300,32 +301,57 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
return (struct diag204_x_part_block *)&block->cpus[i];
}
-static void fill_diag(struct sthyi_sctns *sctns)
+static void *diag204_get_data(bool diag204_allow_busy)
{
- int i, r, pages;
- bool this_lpar;
+ unsigned long subcode;
void *diag204_buf;
+ int pages, rc;
+
+ subcode = DIAG204_SUBC_RSI;
+ subcode |= DIAG204_INFO_EXT;
+ pages = diag204(subcode, 0, NULL);
+ if (pages < 0)
+ return ERR_PTR(pages);
+ if (pages == 0)
+ return ERR_PTR(-ENODATA);
+ diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+ PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!diag204_buf)
+ return ERR_PTR(-ENOMEM);
+ subcode = DIAG204_SUBC_STIB7;
+ subcode |= DIAG204_INFO_EXT;
+ if (diag204_has_bif() && diag204_allow_busy)
+ subcode |= DIAG204_BIF_BIT;
+ rc = diag204(subcode, pages, diag204_buf);
+ if (rc < 0) {
+ vfree(diag204_buf);
+ return ERR_PTR(rc);
+ }
+ return diag204_buf;
+}
+
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+ /*
+ * Check if validity bits are set when diag204 data
+ * is gathered.
+ */
+ if (sctns->par.infpval1)
+ return true;
+ return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+ int i;
+ bool this_lpar;
void *diag224_buf = NULL;
struct diag204_x_info_blk_hdr *ti_hdr;
struct diag204_x_part_block *part_block;
struct diag204_x_phys_block *phys_block;
struct lpar_cpu_inf lpar_inf = {};
- /* Errors are handled through the validity bits in the response. */
- pages = diag204((unsigned long)DIAG204_SUBC_RSI |
- (unsigned long)DIAG204_INFO_EXT, 0, NULL);
- if (pages <= 0)
- return;
-
- diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
- if (!diag204_buf)
- return;
-
- r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
- (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
- if (r < 0)
- goto out;
-
diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
if (!diag224_buf || diag224(diag224_buf))
goto out;
@@ -390,7 +416,6 @@ static void fill_diag(struct sthyi_sctns *sctns)
out:
free_page((unsigned long)diag224_buf);
- vfree(diag204_buf);
}
static int sthyi(u64 vaddr, u64 *rc)
@@ -401,30 +426,41 @@ static int sthyi(u64 vaddr, u64 *rc)
asm volatile(
".insn rre,0xB2560000,%[r1],%[r2]\n"
- "ipm %[cc]\n"
- "srl %[cc],28\n"
- : [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [r2] "+&d" (r2.pair)
: [r1] "d" (r1.pair)
- : "memory", "cc");
+ : CC_CLOBBER_LIST("memory"));
*rc = r2.odd;
- return cc;
+ return CC_TRANSFORM(cc);
}
static int fill_dst(void *dst, u64 *rc)
{
+ void *diag204_buf;
+
struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
/*
* If the facility is on, we don't want to emulate the instruction.
* We ask the hypervisor to provide the data.
*/
- if (test_facility(74))
+ if (test_facility(74)) {
+ memset(dst, 0, PAGE_SIZE);
return sthyi((u64)dst, rc);
-
+ }
+ /*
+ * When emulating, if diag204 returns BUSY don't reset dst buffer
+ * and use cached data.
+ */
+ *rc = 0;
+ diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+ if (IS_ERR(diag204_buf))
+ return PTR_ERR(diag204_buf);
+ memset(dst, 0, PAGE_SIZE);
fill_hdr(sctns);
fill_stsi(sctns);
- fill_diag(sctns);
- *rc = 0;
+ fill_diag(sctns, diag204_buf);
+ vfree(diag204_buf);
return 0;
}
@@ -443,11 +479,14 @@ static int sthyi_update_cache(u64 *rc)
{
int r;
- memset(sthyi_cache.info, 0, PAGE_SIZE);
r = fill_dst(sthyi_cache.info, rc);
- if (r)
- return r;
- sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ if (r == 0) {
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ } else if (r == -EBUSY) {
+ /* mark as expired and return 0 to keep using cached data */
+ sthyi_cache.end = jiffies - 1;
+ r = 0;
+ }
return r;
}
@@ -459,9 +498,9 @@ static int sthyi_update_cache(u64 *rc)
*
* Fills the destination with system information returned by the STHYI
* instruction. The data is generated by emulation or execution of STHYI,
- * if available. The return value is the condition code that would be
- * returned, the rc parameter is the return code which is passed in
- * register R2 + 1.
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
*/
int sthyi_fill(void *dst, u64 *rc)
{
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..4fee74553ca2 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -12,6 +12,7 @@
* platform.
*/
+#include <linux/cpufeature.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -38,33 +39,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
@@ -108,25 +82,35 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
-static void do_syscall(struct pt_regs *regs)
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
unsigned long nr;
+ add_random_kstack_offset();
+ enter_from_user_mode(regs);
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
+ update_timer_sys();
+ if (cpu_has_bear())
+ current->thread.last_break = regs->last_break;
+ local_irq_enable();
+ regs->orig_gpr2 = regs->gprs[2];
+ if (unlikely(per_trap))
+ set_thread_flag(TIF_PER_TRAP);
+ regs->flags = 0;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
nr = regs->int_code & 0xffff;
- if (!nr) {
+ if (likely(!nr)) {
nr = regs->gprs[1] & 0xffff;
regs->int_code &= ~0xffffUL;
regs->int_code |= nr;
}
-
regs->gprs[2] = nr;
-
if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
regs->psw.addr = current->restart_block.arch_data;
current->restart_block.arch_data = 1;
}
nr = syscall_enter_from_user_mode_work(regs, nr);
-
/*
* In the s390 ptrace ABI, both the syscall number and the return value
* use gpr2. However, userspace puts the syscall number either in the
@@ -134,37 +118,11 @@ static void do_syscall(struct pt_regs *regs)
* work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
* and if set, the syscall will be skipped.
*/
-
if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
goto out;
regs->gprs[2] = -ENOSYS;
- if (likely(nr >= NR_syscalls))
- goto out;
- do {
+ if (likely(nr < NR_syscalls))
regs->gprs[2] = current->thread.sys_call_table[nr](regs);
- } while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
out:
- syscall_exit_to_user_mode_work(regs);
-}
-
-void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
-{
- add_random_kstack_offset();
- enter_from_user_mode(regs);
- regs->psw = S390_lowcore.svc_old_psw;
- regs->int_code = S390_lowcore.svc_int_code;
- update_timer_sys();
- if (static_branch_likely(&cpu_has_bear))
- current->thread.last_break = regs->last_break;
-
- local_irq_enable();
- regs->orig_gpr2 = regs->gprs[2];
-
- if (per_trap)
- set_thread_flag(TIF_PER_TRAP);
-
- regs->flags = 0;
- set_pt_regs_flag(regs, PIF_SYSCALL);
- do_syscall(regs);
- exit_to_user_mode();
+ syscall_exit_to_user_mode(regs);
}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index fb85e797946d..c5d958a09ff4 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -4,15 +4,15 @@ gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
uapi := $(gen)/uapi/asm
-syscall := $(srctree)/$(src)/syscall.tbl
-systbl := $(srctree)/$(src)/syscalltbl
+syscall := $(src)/syscall.tbl
+systbl := $(src)/syscalltbl
gen-y := $(kapi)/syscall_table.h
kapi-hdrs-y := $(kapi)/unistd_nr.h
uapi-hdrs-y := $(uapi)/unistd_32.h
uapi-hdrs-y += $(uapi)/unistd_64.h
-targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+targets += $(addprefix ../../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
PHONY += kapi uapi
@@ -23,23 +23,26 @@ uapi: $(uapi-hdrs-y)
# Create output directory if not already present
$(shell mkdir -p $(uapi) $(kapi))
-filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$@" < $< > $@
-filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+quiet_cmd_sysnr = SYSNR $@
+ cmd_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $< > $@
-filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+quiet_cmd_syscalls = SYSTBL $@
+ cmd_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $< > $@
syshdr_abi_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+$(uapi)/unistd_32.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syshdr)
syshdr_abi_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) FORCE
- $(call filechk,syshdr,$@)
+$(uapi)/unistd_64.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syshdr)
-$(kapi)/syscall_table.h: $(syscall) FORCE
- $(call filechk,syscalls)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,syscalls)
sysnr_abi_unistd_nr := common,32,64
-$(kapi)/unistd_nr.h: $(syscall) FORCE
- $(call filechk,sysnr)
+$(kapi)/unistd_nr.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 799147658dee..a4569b96ef06 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -100,7 +100,7 @@
106 common stat sys_newstat compat_sys_newstat
107 common lstat sys_newlstat compat_sys_newlstat
108 common fstat sys_newfstat compat_sys_newfstat
-110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+110 common lookup_dcookie - -
111 common vhangup sys_vhangup sys_vhangup
112 common idle - -
114 common wait4 sys_wait4 compat_sys_wait4
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
@@ -449,7 +449,24 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
-# 447 reserved for memfd_secret
+447 common memfd_secret sys_memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack
+454 common futex_wake sys_futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue sys_futex_requeue
+457 common statmount sys_statmount sys_statmount
+458 common listmount sys_listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal sys_mseal
+463 common setxattrat sys_setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index b5e364358ce4..1ea84e942bd4 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -5,6 +5,7 @@
* Martin Schwidefsky <schwidefsky@de.ibm.com>,
*/
+#include <linux/cpufeature.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -15,54 +16,17 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <asm/asm-extable.h>
+#include <asm/machine.h>
#include <asm/ebcdic.h>
#include <asm/debug.h>
#include <asm/sysinfo.h>
#include <asm/cpcmd.h>
#include <asm/topology.h>
-#include <asm/fpu/api.h>
+#include <asm/fpu.h>
+#include <asm/asm.h>
int topology_max_mnest;
-static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
-{
- int r0 = (fc << 28) | sel1;
- int rc = 0;
-
- asm volatile(
- " lr 0,%[r0]\n"
- " lr 1,%[r1]\n"
- " stsi 0(%[sysinfo])\n"
- "0: jz 2f\n"
- "1: lhi %[rc],%[retval]\n"
- "2: lr %[r0],0\n"
- EX_TABLE(0b, 1b)
- : [r0] "+d" (r0), [rc] "+d" (rc)
- : [r1] "d" (sel2),
- [sysinfo] "a" (sysinfo),
- [retval] "K" (-EOPNOTSUPP)
- : "cc", "0", "1", "memory");
- *lvl = ((unsigned int) r0) >> 28;
- return rc;
-}
-
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
-{
- int lvl, rc;
-
- rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
- if (rc)
- return rc;
- return fc ? 0 : lvl;
-}
-EXPORT_SYMBOL(stsi);
-
#ifdef CONFIG_PROC_FS
static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
@@ -81,10 +45,12 @@ static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
{
+ bool has_var_cap;
int i;
if (stsi(info, 1, 1, 1))
return;
+ has_var_cap = !!info->model_var_cap[0];
EBCASC(info->manufacturer, sizeof(info->manufacturer));
EBCASC(info->type, sizeof(info->type));
EBCASC(info->model, sizeof(info->model));
@@ -93,6 +59,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
EBCASC(info->model_capacity, sizeof(info->model_capacity));
EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ if (has_var_cap)
+ EBCASC(info->model_var_cap, sizeof(info->model_var_cap));
seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
seq_printf(m, "Type: %-4.4s\n", info->type);
if (info->lic)
@@ -120,12 +88,18 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
info->model_temp_cap,
info->model_temp_cap_rating);
+ if (has_var_cap && info->model_var_cap_rating)
+ seq_printf(m, "Model Var. Capacity: %-16.16s %08u\n",
+ info->model_var_cap,
+ info->model_var_cap_rating);
if (info->ncr)
seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
if (info->npr)
seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
if (info->ntr)
seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (has_var_cap && info->nvr)
+ seq_printf(m, "Nominal Var. Rating: %08u\n", info->nvr);
if (info->cai) {
seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
@@ -144,7 +118,7 @@ static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
int i;
seq_putc(m, '\n');
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
if (stsi(info, 15, 1, topology_max_mnest))
return;
@@ -387,7 +361,7 @@ static void service_level_vm_print(struct seq_file *m,
{
char *query_buffer, *str;
- query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ query_buffer = kmalloc(1024, GFP_KERNEL);
if (!query_buffer)
return;
cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
@@ -405,7 +379,7 @@ static struct service_level service_level_vm = {
static __init int create_proc_service_level(void)
{
proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
- if (MACHINE_IS_VM)
+ if (machine_is_vm())
register_service_level(&service_level_vm);
return 0;
}
@@ -416,9 +390,9 @@ subsys_initcall(create_proc_service_level);
*/
void s390_adjust_jiffies(void)
{
+ DECLARE_KERNEL_FPU_ONSTACK16(fpu);
struct sysinfo_1_2_2 *info;
unsigned long capability;
- struct kernel_fpu fpu;
info = (void *) get_zeroed_page(GFP_KERNEL);
if (!info)
@@ -437,21 +411,14 @@ void s390_adjust_jiffies(void)
* point division ..
*/
kernel_fpu_begin(&fpu, KERNEL_FPR);
- asm volatile(
- " sfpc %3\n"
- " l %0,%1\n"
- " tmlh %0,0xff80\n"
- " jnz 0f\n"
- " cefbr %%f2,%0\n"
- " j 1f\n"
- "0: le %%f2,%1\n"
- "1: cefbr %%f0,%2\n"
- " debr %%f0,%%f2\n"
- " cgebr %0,5,%%f0\n"
- : "=&d" (capability)
- : "Q" (info->capability), "d" (10000000), "d" (0)
- : "cc"
- );
+ fpu_sfpc(0);
+ if (info->capability & 0xff800000)
+ fpu_ldgr(2, info->capability);
+ else
+ fpu_cefbr(2, info->capability);
+ fpu_cefbr(0, 10000000);
+ fpu_debr(0, 2);
+ capability = fpu_cgebr(0, 5);
kernel_fpu_end(&fpu, KERNEL_FPR);
} else
/*
@@ -495,7 +462,6 @@ static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = { \
.open = stsi_open_##fc##_##s1##_##s2, \
.release = stsi_release, \
.read = stsi_read, \
- .llseek = no_llseek, \
};
static int stsi_release(struct inode *inode, struct file *file)
@@ -557,7 +523,7 @@ static __init int stsi_init_debugfs(void)
sf = &stsi_file[i];
debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
}
- if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && cpu_has_topology()) {
char link_to[10];
sprintf(link_to, "15_1_%d", topology_mnest_limit());
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
index 2c8b14cc5556..26f2981aa09e 100644
--- a/arch/s390/kernel/text_amode31.S
+++ b/arch/s390/kernel/text_amode31.S
@@ -18,8 +18,7 @@
* affects a few functions that are not performance-relevant.
*/
.macro BR_EX_AMODE31_r14
- larl %r1,0f
- ex 0,0(%r1)
+ exrl 0,0f
j .
0: br %r14
.endm
@@ -27,7 +26,7 @@
/*
* int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
*/
-ENTRY(_diag14_amode31)
+SYM_FUNC_START(_diag14_amode31)
lgr %r1,%r2
lgr %r2,%r3
lgr %r3,%r4
@@ -42,12 +41,12 @@ ENTRY(_diag14_amode31)
lgfr %r2,%r5
BR_EX_AMODE31_r14
EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
-ENDPROC(_diag14_amode31)
+SYM_FUNC_END(_diag14_amode31)
/*
* int _diag210_amode31(struct diag210 *addr)
*/
-ENTRY(_diag210_amode31)
+SYM_FUNC_START(_diag210_amode31)
lgr %r1,%r2
lhi %r2,-1
sam31
@@ -60,12 +59,25 @@ ENTRY(_diag210_amode31)
lgfr %r2,%r2
BR_EX_AMODE31_r14
EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
-ENDPROC(_diag210_amode31)
+SYM_FUNC_END(_diag210_amode31)
/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+SYM_FUNC_START(_diag8c_amode31)
+ llgf %r3,0(%r3)
+ sam31
+ diag %r2,%r4,0x8c
+.Ldiag8c_ex:
+ sam64
+ lgfr %r2,%r3
+ BR_EX_AMODE31_r14
+ EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+SYM_FUNC_END(_diag8c_amode31)
+/*
* int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
*/
-ENTRY(_diag26c_amode31)
+SYM_FUNC_START(_diag26c_amode31)
lghi %r5,-EOPNOTSUPP
sam31
diag %r2,%r4,0x26c
@@ -74,42 +86,42 @@ ENTRY(_diag26c_amode31)
lgfr %r2,%r5
BR_EX_AMODE31_r14
EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
-ENDPROC(_diag26c_amode31)
+SYM_FUNC_END(_diag26c_amode31)
/*
- * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
+ * void _diag0c_amode31(unsigned long rx)
*/
-ENTRY(_diag0c_amode31)
+SYM_FUNC_START(_diag0c_amode31)
sam31
diag %r2,%r2,0x0c
sam64
BR_EX_AMODE31_r14
-ENDPROC(_diag0c_amode31)
+SYM_FUNC_END(_diag0c_amode31)
/*
* void _diag308_reset_amode31(void)
*
* Calls diag 308 subcode 1 and continues execution
*/
-ENTRY(_diag308_reset_amode31)
- larl %r4,.Lctlregs # Save control registers
+SYM_FUNC_START(_diag308_reset_amode31)
+ larl %r4,ctlregs # Save control registers
stctg %c0,%c15,0(%r4)
lg %r2,0(%r4) # Disable lowcore protection
nilh %r2,0xefff
- larl %r4,.Lctlreg0
+ larl %r4,ctlreg0
stg %r2,0(%r4)
lctlg %c0,%c0,0(%r4)
- larl %r4,.Lfpctl # Floating point control register
+ larl %r4,fpctl # Floating point control register
stfpc 0(%r4)
- larl %r4,.Lprefix # Save prefix register
+ larl %r4,prefix # Save prefix register
stpx 0(%r4)
- larl %r4,.Lprefix_zero # Set prefix register to 0
+ larl %r4,prefix_zero # Set prefix register to 0
spx 0(%r4)
- larl %r4,.Lcontinue_psw # Save PSW flags
+ larl %r4,continue_psw # Save PSW flags
epsw %r2,%r3
stm %r2,%r3,0(%r4)
larl %r4,.Lrestart_part2 # Setup restart PSW at absolute 0
- larl %r3,.Lrestart_diag308_psw
+ larl %r3,restart_diag308_psw
og %r4,0(%r3) # Save PSW
lghi %r3,0
sturg %r4,%r3 # Use sturg, because of large pages
@@ -121,39 +133,26 @@ ENTRY(_diag308_reset_amode31)
lhi %r1,2 # Use mode 2 = ESAME (dump)
sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
sam64 # Switch to 64 bit addressing mode
- larl %r4,.Lctlregs # Restore control registers
+ larl %r4,ctlregs # Restore control registers
lctlg %c0,%c15,0(%r4)
- larl %r4,.Lfpctl # Restore floating point ctl register
+ larl %r4,fpctl # Restore floating point ctl register
lfpc 0(%r4)
- larl %r4,.Lprefix # Restore prefix register
+ larl %r4,prefix # Restore prefix register
spx 0(%r4)
- larl %r4,.Lcontinue_psw # Restore PSW flags
+ larl %r4,continue_psw # Restore PSW flags
larl %r2,.Lcontinue
stg %r2,8(%r4)
lpswe 0(%r4)
.Lcontinue:
BR_EX_AMODE31_r14
-ENDPROC(_diag308_reset_amode31)
+SYM_FUNC_END(_diag308_reset_amode31)
.section .amode31.data,"aw",@progbits
-.align 8
-.Lrestart_diag308_psw:
- .long 0x00080000,0x80000000
-
-.align 8
-.Lcontinue_psw:
- .quad 0,0
-
-.align 8
-.Lctlreg0:
- .quad 0
-.Lctlregs:
- .rept 16
- .quad 0
- .endr
-.Lfpctl:
- .long 0
-.Lprefix:
- .long 0
-.Lprefix_zero:
- .long 0
+ .balign 8
+SYM_DATA_LOCAL(restart_diag308_psw, .long 0x00080000,0x80000000)
+SYM_DATA_LOCAL(continue_psw, .quad 0,0)
+SYM_DATA_LOCAL(ctlreg0, .quad 0)
+SYM_DATA_LOCAL(ctlregs, .fill 16,8,0)
+SYM_DATA_LOCAL(fpctl, .long 0)
+SYM_DATA_LOCAL(prefix, .long 0)
+SYM_DATA_LOCAL(prefix_zero, .long 0)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..fed17d407a44 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -36,7 +36,6 @@
#include <linux/profile.h>
#include <linux/timex.h>
#include <linux/notifier.h>
-#include <linux/timekeeper_internal.h>
#include <linux/clockchips.h>
#include <linux/gfp.h>
#include <linux/kprobes.h>
@@ -55,10 +54,10 @@
#include <asm/cio.h>
#include "entry.h"
-union tod_clock tod_clock_base __section(".data");
+union tod_clock __bootdata_preserved(tod_clock_base);
EXPORT_SYMBOL_GPL(tod_clock_base);
-u64 clock_comparator_max = -1ULL;
+u64 __bootdata_preserved(clock_comparator_max);
EXPORT_SYMBOL_GPL(clock_comparator_max);
static DEFINE_PER_CPU(struct clock_event_device, comparators);
@@ -80,12 +79,10 @@ void __init time_early_init(void)
{
struct ptff_qto qto;
struct ptff_qui qui;
- int cs;
/* Initialize TOD steering parameters */
tod_steering_end = tod_clock_base.tod;
- for (cs = 0; cs < CS_BASES; cs++)
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+ vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
if (!test_facility(28))
return;
@@ -102,6 +99,11 @@ void __init time_early_init(void)
((long) qui.old_leap * 4096000000L);
}
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return tod_to_ns(__get_tod_clock_monotonic());
+}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -126,7 +128,7 @@ void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = clock_comparator_max;
+ get_lowcore()->clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -134,8 +136,8 @@ void clock_comparator_work(void)
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- S390_lowcore.clock_comparator = get_tod_clock() + delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return 0;
}
@@ -148,8 +150,8 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
@@ -168,10 +170,10 @@ void init_cpu_timer(void)
clockevents_register_device(cd);
/* Enable clock comparator timer interrupt. */
- __ctl_set_bit(0,11);
+ local_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SUBMASK_BIT);
/* Always allow the timing alert external interrupt. */
- __ctl_set_bit(0, 4);
+ local_ctl_set_bit(0, CR0_ETR_SUBMASK_BIT);
}
static void clock_comparator_interrupt(struct ext_code ext_code,
@@ -179,8 +181,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == clock_comparator_max)
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator == clock_comparator_max)
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
static void stp_timing_alert(struct stp_irq_parm *);
@@ -246,10 +248,11 @@ static struct clocksource clocksource_tod = {
.rating = 400,
.read = read_tod_clock,
.mask = CLOCKSOURCE_MASK(64),
- .mult = 1000,
- .shift = 12,
+ .mult = 4096000,
+ .shift = 24,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+ .id = CSID_S390_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -368,7 +371,6 @@ static void clock_sync_global(long delta)
{
unsigned long now, adj;
struct ptff_qto qto;
- int cs;
/* Fixup the monotonic sched clock. */
tod_clock_base.eitod += delta;
@@ -384,10 +386,8 @@ static void clock_sync_global(long delta)
panic("TOD clock sync offset %li is too large to drift\n",
tod_steering_delta);
tod_steering_end = now + (abs(tod_steering_delta) << 15);
- for (cs = 0; cs < CS_BASES; cs++) {
- vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
- vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
- }
+ vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end;
+ vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta;
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
@@ -403,12 +403,12 @@ static void clock_sync_global(long delta)
static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != clock_comparator_max) {
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator != clock_comparator_max) {
+ get_lowcore()->clock_comparator += delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
/* Adjust the last_update_clock time-stamp. */
- S390_lowcore.last_update_clock += delta;
+ get_lowcore()->last_update_clock += delta;
}
/* Single threaded workqueue used for stp sync events */
@@ -463,6 +463,12 @@ static void __init stp_reset(void)
}
}
+bool stp_enabled(void)
+{
+ return test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online;
+}
+EXPORT_SYMBOL(stp_enabled);
+
static void stp_timeout(struct timer_list *unused)
{
queue_work(time_sync_wq, &stp_work);
@@ -651,12 +657,12 @@ static void stp_check_leap(void)
if (ret < 0)
pr_err("failed to set leap second flags\n");
/* arm Timer to clear leap second flags */
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(14400));
} else {
/* The day the leap second is scheduled for hasn't been reached. Retry
* in one hour.
*/
- mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+ mod_timer(&stp_timer, jiffies + secs_to_jiffies(3600));
}
}
@@ -674,7 +680,7 @@ static void stp_work_fn(struct work_struct *work)
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
- del_timer_sync(&stp_timer);
+ timer_delete_sync(&stp_timer);
goto out_unlock;
}
@@ -697,7 +703,7 @@ static void stp_work_fn(struct work_struct *work)
if (!check_sync_clock())
/*
- * There is a usable clock but the synchonization failed.
+ * There is a usable clock but the synchronization failed.
* Retry after a second.
*/
mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
@@ -711,7 +717,7 @@ out_unlock:
/*
* STP subsys sysfs interface functions
*/
-static struct bus_type stp_subsys = {
+static const struct bus_type stp_subsys = {
.name = "stp",
.dev_name = "stp",
};
@@ -724,8 +730,8 @@ static ssize_t ctn_id_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%016lx\n",
- *(unsigned long *) stp_info.ctnid);
+ ret = sysfs_emit(buf, "%016lx\n",
+ *(unsigned long *)stp_info.ctnid);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -740,7 +746,7 @@ static ssize_t ctn_type_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.ctn);
+ ret = sysfs_emit(buf, "%i\n", stp_info.ctn);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -755,7 +761,7 @@ static ssize_t dst_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x2000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.dsto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -770,7 +776,7 @@ static ssize_t leap_seconds_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x8000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.leaps);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -796,11 +802,11 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev,
return ret;
if (!stzi.lsoib.p)
- return sprintf(buf, "0,0\n");
+ return sysfs_emit(buf, "0,0\n");
- return sprintf(buf, "%lu,%d\n",
- tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
- stzi.lsoib.nlso - stzi.lsoib.also);
+ return sysfs_emit(buf, "%lu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
}
static DEVICE_ATTR_RO(leap_seconds_scheduled);
@@ -813,7 +819,7 @@ static ssize_t stratum_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.stratum);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -828,7 +834,7 @@ static ssize_t time_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x0800))
- ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+ ret = sysfs_emit(buf, "%i\n", (int)stp_info.tto);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -843,7 +849,7 @@ static ssize_t time_zone_offset_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x4000))
- ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+ ret = sysfs_emit(buf, "%i\n", (int)(s16)stp_info.tzo);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -858,7 +864,7 @@ static ssize_t timing_mode_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tmd);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tmd);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -873,7 +879,7 @@ static ssize_t timing_state_show(struct device *dev,
mutex_lock(&stp_mutex);
if (stpinfo_valid())
- ret = sprintf(buf, "%i\n", stp_info.tst);
+ ret = sysfs_emit(buf, "%i\n", stp_info.tst);
mutex_unlock(&stp_mutex);
return ret;
}
@@ -884,7 +890,7 @@ static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%i\n", stp_online);
+ return sysfs_emit(buf, "%i\n", stp_online);
}
static ssize_t online_store(struct device *dev,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index c6eecd4a5302..3df048e190b1 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -6,6 +6,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/workqueue.h>
#include <linux/memblock.h>
#include <linux/uaccess.h>
@@ -24,7 +25,9 @@
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
+#include <asm/hiperdispatch.h>
#include <asm/sysinfo.h>
+#include <asm/asm.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -47,6 +50,7 @@ static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
+static int cpu_management;
static DECLARE_WORK(topology_work, topology_work_fn);
@@ -95,7 +99,7 @@ out:
static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
{
static cpumask_t mask;
- int i;
+ unsigned int max_cpu;
cpumask_clear(&mask);
if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
@@ -104,9 +108,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
if (topology_mode != TOPOLOGY_MODE_HW)
goto out;
cpu -= cpu % (smp_cpu_mtid + 1);
- for (i = 0; i <= smp_cpu_mtid; i++) {
- if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
- cpumask_set_cpu(cpu + i, &mask);
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+ cpumask_set_cpu(cpu, &mask);
}
out:
cpumask_copy(dst, &mask);
@@ -123,25 +128,27 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
unsigned int core;
for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
- unsigned int rcore;
- int lcpu, i;
+ unsigned int max_cpu, rcore;
+ int cpu;
rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
- lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
- if (lcpu < 0)
+ cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+ if (cpu < 0)
continue;
- for (i = 0; i <= smp_cpu_mtid; i++) {
- topo = &cpu_topology[lcpu + i];
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ topo = &cpu_topology[cpu];
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->socket_id = socket->id;
topo->core_id = rcore;
- topo->thread_id = lcpu + i;
+ topo->thread_id = cpu;
topo->dedicated = tl_core->d;
- cpumask_set_cpu(lcpu + i, &drawer->mask);
- cpumask_set_cpu(lcpu + i, &book->mask);
- cpumask_set_cpu(lcpu + i, &socket->mask);
- smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+ cpumask_set_cpu(cpu, &drawer->mask);
+ cpumask_set_cpu(cpu, &book->mask);
+ cpumask_set_cpu(cpu, &socket->mask);
+ smp_cpu_set_polarization(cpu, tl_core->pp);
+ smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH);
}
}
}
@@ -219,22 +226,22 @@ static void topology_update_polarization_simple(void)
static int ptf(unsigned long fc)
{
- int rc;
+ int cc;
asm volatile(
- " .insn rre,0xb9a20000,%1,%1\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc)
- : "d" (fc) : "cc");
- return rc;
+ " .insn rre,0xb9a20000,%[fc],%[fc]\n"
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
+ : [fc] "d" (fc)
+ : CC_CLOBBER);
+ return CC_TRANSFORM(cc);
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
@@ -268,6 +275,7 @@ void update_cpu_masks(void)
topo->drawer_id = id;
}
}
+ hd_reset_state();
for_each_online_cpu(cpu) {
topo = &cpu_topology[cpu];
pkg_first = cpumask_first(&topo->core_mask);
@@ -276,8 +284,10 @@ void update_cpu_masks(void)
for_each_cpu(sibling, &topo->core_mask) {
topo_sibling = &cpu_topology[sibling];
smt_first = cpumask_first(&topo_sibling->thread_mask);
- if (sibling == smt_first)
+ if (sibling == smt_first) {
topo_package->booted_cores++;
+ hd_add_core(sibling);
+ }
}
} else {
topo->booted_cores = topo_package->booted_cores;
@@ -301,33 +311,33 @@ static void __arch_update_dedicated_flag(void *arg)
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- int rc = 0;
+ int rc, hd_status;
+ hd_status = 0;
+ rc = 0;
mutex_lock(&smp_cpu_state_mutex);
- if (MACHINE_HAS_TOPOLOGY) {
+ if (cpu_has_topology()) {
rc = 1;
store_topology(info);
tl_to_masks(info);
}
update_cpu_masks();
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
topology_update_polarization_simple();
+ if (cpu_management == 1)
+ hd_status = hd_enable_hiperdispatch();
mutex_unlock(&smp_cpu_state_mutex);
+ if (hd_status == 0)
+ hd_disable_hiperdispatch();
return rc;
}
int arch_update_cpu_topology(void)
{
- struct device *dev;
- int cpu, rc;
+ int rc;
rc = __arch_update_cpu_topology();
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
- for_each_online_cpu(cpu) {
- dev = get_cpu_device(cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- }
return rc;
}
@@ -362,12 +372,12 @@ static void set_topology_timer(void)
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+ mod_timer(&topology_timer, jiffies + secs_to_jiffies(60));
}
void topology_expect_change(void)
{
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
@@ -378,7 +388,24 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int cpu_management;
+static int set_polarization(int polarization)
+{
+ int rc = 0;
+
+ cpus_read_lock();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == polarization)
+ goto out;
+ rc = topology_set_cpu_management(polarization);
+ if (rc)
+ goto out;
+ cpu_management = polarization;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ cpus_read_unlock();
+ return rc;
+}
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
@@ -387,7 +414,7 @@ static ssize_t dispatching_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", cpu_management);
+ count = sysfs_emit(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -404,19 +431,7 @@ static ssize_t dispatching_store(struct device *dev,
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
- rc = 0;
- cpus_read_lock();
- mutex_lock(&smp_cpu_state_mutex);
- if (cpu_management == val)
- goto out;
- rc = topology_set_cpu_management(val);
- if (rc)
- goto out;
- cpu_management = val;
- topology_expect_change();
-out:
- mutex_unlock(&smp_cpu_state_mutex);
- cpus_read_unlock();
+ rc = set_polarization(val);
return rc ? rc : count;
}
static DEVICE_ATTR_RW(dispatching);
@@ -430,19 +445,19 @@ static ssize_t cpu_polarization_show(struct device *dev,
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
- count = sprintf(buf, "horizontal\n");
+ count = sysfs_emit(buf, "horizontal\n");
break;
case POLARIZATION_VL:
- count = sprintf(buf, "vertical:low\n");
+ count = sysfs_emit(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
- count = sprintf(buf, "vertical:medium\n");
+ count = sysfs_emit(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
- count = sprintf(buf, "vertical:high\n");
+ count = sysfs_emit(buf, "vertical:high\n");
break;
default:
- count = sprintf(buf, "unknown\n");
+ count = sysfs_emit(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
@@ -466,7 +481,7 @@ static ssize_t cpu_dedicated_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+ count = sysfs_emit(buf, "%d\n", topology_cpu_dedicated(cpu));
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -486,7 +501,7 @@ int topology_cpu_init(struct cpu *cpu)
int rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
- if (rc || !MACHINE_HAS_TOPOLOGY)
+ if (rc || !cpu_has_topology())
return rc;
rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
if (rc)
@@ -520,7 +535,7 @@ static struct sched_domain_topology_level s390_topology[] = {
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { cpu_cpu_mask, SD_INIT_NAME(PKG) },
{ NULL, },
};
@@ -534,33 +549,38 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = memblock_alloc(sizeof(*mask->next), 8);
- if (!mask->next)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*mask->next), 8);
+ mask->next = memblock_alloc_or_panic(sizeof(*mask->next), 8);
mask = mask->next;
}
}
+static int __init detect_polarization(union topology_entry *tle)
+{
+ struct topology_core *tl_core;
+
+ while (tle->nl)
+ tle = next_tle(tle);
+ tl_core = (struct topology_core *)tle;
+ return tl_core->pp != POLARIZATION_HRZ;
+}
+
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
set_sched_topology(s390_topology);
if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
topology_mode = TOPOLOGY_MODE_HW;
else
topology_mode = TOPOLOGY_MODE_SINGLE;
}
- if (!MACHINE_HAS_TOPOLOGY)
+ if (!cpu_has_topology())
goto out;
- tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- if (!tl_info)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE);
+ tl_info = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
+ cpu_management = detect_polarization(info->tle);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
@@ -577,7 +597,7 @@ static inline int topology_get_mode(int enabled)
{
if (!enabled)
return TOPOLOGY_MODE_SINGLE;
- return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+ return cpu_has_topology() ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
static inline int topology_is_enabled(void)
@@ -598,7 +618,7 @@ static int __init topology_setup(char *str)
}
early_param("topology", topology_setup);
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
@@ -628,33 +648,58 @@ static int topology_ctl_handler(struct ctl_table *ctl, int write,
return rc;
}
-static struct ctl_table topology_ctl_table[] = {
+static int polarization_ctl_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int polarization;
+ int rc;
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &polarization,
+ .maxlen = sizeof(int),
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+
+ polarization = cpu_management;
+ rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+ return set_polarization(polarization);
+}
+
+static const struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
- { },
-};
-
-static struct ctl_table topology_dir_table[] = {
{
- .procname = "s390",
- .maxlen = 0,
- .mode = 0555,
- .child = topology_ctl_table,
+ .procname = "polarization",
+ .mode = 0644,
+ .proc_handler = polarization_ctl_handler,
},
- { },
};
static int __init topology_init(void)
{
+ struct device *dev_root;
+ int rc = 0;
+
timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
- if (MACHINE_HAS_TOPOLOGY)
+ if (cpu_has_topology())
set_topology_timer();
else
topology_update_polarization_simple();
- register_sysctl_table(topology_dir_table);
- return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+ if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY_VERTICAL))
+ set_polarization(1);
+ register_sysctl("s390", topology_ctl_table);
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_dispatching);
+ put_device(dev_root);
+ }
+ return rc;
}
device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1d2aa448d103..19687dab32f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -3,18 +3,13 @@
* S390 version
* Copyright IBM Corp. 1999, 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
*
* Derived from "arch/i386/kernel/traps.c"
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'asm.s'.
- */
-#include "asm/irqflags.h"
-#include "asm/ptrace.h"
+#include <linux/cpufeature.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/randomize_kstack.h>
@@ -27,9 +22,13 @@
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <linux/entry-common.h>
+#include <linux/kmsan.h>
#include <asm/asm-extable.h>
-#include <asm/fpu/api.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
#include <asm/vtime.h>
+#include <asm/fpu.h>
+#include <asm/fault.h>
#include "entry.h"
static inline void __user *get_trap_ip(struct pt_regs *regs)
@@ -40,29 +39,30 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
address = current->thread.trap_tdb.data[3];
else
address = regs->psw.addr;
- return (void __user *) (address - (regs->int_code >> 16));
+ return (void __user *)(address - (regs->int_code >> 16));
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return 1;
}
+#endif
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
if (user_mode(regs)) {
force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
- } else {
+ } else {
if (!fixup_exception(regs))
die(regs, str);
- }
+ }
}
static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
- if (notify_die(DIE_TRAP, str, regs, 0,
- regs->int_code, si_signo) == NOTIFY_STOP)
+ if (notify_die(DIE_TRAP, str, regs, 0, regs->int_code, si_signo) == NOTIFY_STOP)
return;
do_report_trap(regs, si_signo, si_code, str);
}
@@ -74,8 +74,7 @@ void do_per_trap(struct pt_regs *regs)
return;
if (!current->ptrace)
return;
- force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __force __user *) current->thread.per_event.address);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __force __user *)current->thread.per_event.address);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -94,36 +93,25 @@ static void name(struct pt_regs *regs) \
do_trap(regs, signr, sicode, str); \
}
-DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
- "addressing exception")
-DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
- "execute exception")
-DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
- "fixpoint divide exception")
-DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
- "fixpoint overflow exception")
-DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
- "HFP overflow exception")
-DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
- "HFP underflow exception")
-DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
- "HFP significance exception")
-DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
- "HFP divide exception")
-DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
- "HFP square root exception")
-DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
- "operand exception")
-DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
- "privileged operation")
-DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
- "special operation exception")
-DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
- "transaction constraint exception")
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, "addressing exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, "fixpoint divide exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, "execute exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, "HFP divide exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, "HFP overflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, "HFP significance exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, "HFP square root exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, "HFP underflow exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, "operand exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, "fixpoint overflow exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, "special operation exception")
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception");
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception")
static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
{
int si_code = 0;
+
/* FPC[2] is Data Exception Code */
if ((fpc & 0x00000300) == 0) {
/* bits 6 and 7 of DXC are 0 iff IEEE exception */
@@ -149,36 +137,35 @@ static void translation_specification_exception(struct pt_regs *regs)
static void illegal_op(struct pt_regs *regs)
{
- __u8 opcode[6];
- __u16 __user *location;
int is_uprobe_insn = 0;
+ u16 __user *location;
int signal = 0;
+ u16 opcode;
location = get_trap_ip(regs);
-
if (user_mode(regs)) {
- if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ if (get_user(opcode, location))
return;
- if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+ if (opcode == S390_BREAKPOINT_U16) {
if (current->ptrace)
force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
else
signal = SIGILL;
#ifdef CONFIG_UPROBES
- } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ } else if (opcode == UPROBE_SWBP_INSN) {
is_uprobe_insn = 1;
#endif
- } else
+ } else {
signal = SIGILL;
+ }
}
/*
- * We got either an illegal op in kernel mode, or user space trapped
+ * This is either an illegal op in kernel mode, or user space trapped
* on a uprobes illegal instruction. See if kprobes or uprobes picks
* it up. If not, SIGILL.
*/
if (is_uprobe_insn || !user_mode(regs)) {
- if (notify_die(DIE_BPT, "bpt", regs, 0,
- 3, SIGTRAP) != NOTIFY_STOP)
+ if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
}
if (signal)
@@ -186,21 +173,13 @@ static void illegal_op(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(illegal_op);
-DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
- "specification exception");
-
static void vector_exception(struct pt_regs *regs)
{
int si_code, vic;
- if (!MACHINE_HAS_VX) {
- do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
- return;
- }
-
/* get vector interrupt code from fpc */
- save_fpu_regs();
- vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+ save_user_fpu_regs();
+ vic = (current->thread.ufpu.fpc & 0xf00) >> 8;
switch (vic) {
case 1: /* invalid vector operation */
si_code = FPE_FLTINV;
@@ -225,9 +204,9 @@ static void vector_exception(struct pt_regs *regs)
static void data_exception(struct pt_regs *regs)
{
- save_fpu_regs();
- if (current->thread.fpu.fpc & FPC_DXC_MASK)
- do_fp_trap(regs, current->thread.fpu.fpc);
+ save_user_fpu_regs();
+ if (current->thread.ufpu.fpc & FPC_DXC_MASK)
+ do_fp_trap(regs, current->thread.ufpu.fpc);
else
do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
@@ -245,7 +224,6 @@ static void monitor_event_exception(struct pt_regs *regs)
{
if (user_mode(regs))
return;
-
switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
case BUG_TRAP_TYPE_NONE:
fixup_exception(regs);
@@ -258,15 +236,20 @@ static void monitor_event_exception(struct pt_regs *regs)
}
}
-void kernel_stack_overflow(struct pt_regs *regs)
+void kernel_stack_invalid(struct pt_regs *regs)
{
+ /*
+ * Normally regs are unpoisoned by the generic entry code, but
+ * kernel_stack_overflow() is a rare case that is called bypassing it.
+ */
+ kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
- printk("Kernel stack overflow.\n");
+ pr_emerg("Kernel stack pointer invalid\n");
show_regs(regs);
bust_spinlocks(0);
- panic("Corrupt kernel stack, can't continue.");
+ panic("Invalid kernel stack pointer, cannot continue");
}
-NOKPROBE_SYMBOL(kernel_stack_overflow);
+NOKPROBE_SYMBOL(kernel_stack_invalid);
static void __init test_monitor_call(void)
{
@@ -274,18 +257,30 @@ static void __init test_monitor_call(void)
if (!IS_ENABLED(CONFIG_BUG))
return;
- asm volatile(
+ asm_inline volatile(
" mc 0,0\n"
- "0: xgr %0,%0\n"
+ "0: lhi %[val],0\n"
"1:\n"
- EX_TABLE(0b,1b)
- : "+d" (val));
+ EX_TABLE(0b, 1b)
+ : [val] "+d" (val));
if (!val)
panic("Monitor call doesn't work!\n");
}
void __init trap_init(void)
{
+ struct lowcore *lc = get_lowcore();
+ unsigned long flags;
+ struct ctlreg cr0;
+
+ local_irq_save(flags);
+ cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
+ psw_bits(lc->external_new_psw).mcheck = 1;
+ psw_bits(lc->program_new_psw).mcheck = 1;
+ psw_bits(lc->svc_new_psw).mcheck = 1;
+ psw_bits(lc->io_new_psw).mcheck = 1;
+ local_ctl_load(0, &cr0);
+ local_irq_restore(flags);
local_mcck_enable();
test_monitor_call();
}
@@ -294,36 +289,47 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
- unsigned int trapnr;
+ struct lowcore *lc = get_lowcore();
irqentry_state_t state;
+ unsigned int trapnr;
+ union teid teid;
- regs->int_code = S390_lowcore.pgm_int_code;
- regs->int_parm_long = S390_lowcore.trans_exc_code;
-
+ teid.val = lc->trans_exc_code;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = teid.val;
+ /*
+ * In case of a guest fault, short-circuit the fault handler and return.
+ * This way the sie64a() function will return 0; fault address and
+ * other relevant bits are saved in current->thread.gmap_teid, and
+ * the fault number in current->thread.gmap_int_code. KVM will be
+ * able to use this information to handle the fault.
+ */
+ if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) {
+ current->thread.gmap_teid.val = regs->int_parm_long;
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
+ return;
+ }
state = irqentry_enter(regs);
-
if (user_mode(regs)) {
update_timer_sys();
- if (!static_branch_likely(&cpu_has_bear)) {
+ if (!cpu_has_bear()) {
if (regs->last_break < 4096)
regs->last_break = 1;
}
current->thread.last_break = regs->last_break;
}
-
- if (S390_lowcore.pgm_code & 0x0200) {
+ if (lc->pgm_code & 0x0200) {
/* transaction abort */
- current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+ current->thread.trap_tdb = lc->pgm_tdb;
}
-
- if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+ if (lc->pgm_code & PGM_INT_CODE_PER) {
if (user_mode(regs)) {
struct per_event *ev = &current->thread.per_event;
set_thread_flag(TIF_PER_TRAP);
- ev->address = S390_lowcore.per_address;
- ev->cause = S390_lowcore.per_code_combined;
- ev->paid = S390_lowcore.per_access_id;
+ ev->address = lc->per_address;
+ ev->cause = lc->per_code_combined;
+ ev->paid = lc->per_access_id;
} else {
/* PER event in kernel is kprobes */
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
@@ -331,11 +337,9 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
goto out;
}
}
-
if (!irqs_disabled_flags(regs->psw.mask))
trace_hardirqs_on();
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
-
trapnr = regs->int_code & PGM_INT_CODE_MASK;
if (trapnr)
pgm_check_table[trapnr](regs);
@@ -387,8 +391,8 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
[0x3b] = do_dat_exception,
[0x3c] = default_trap_handler,
[0x3d] = do_secure_storage_access,
- [0x3e] = do_non_secure_storage_access,
- [0x3f] = do_secure_storage_violation,
+ [0x3e] = default_trap_handler,
+ [0x3f] = default_trap_handler,
[0x40] = monitor_event_exception,
[0x41 ... 0x7f] = default_trap_handler,
};
@@ -399,5 +403,3 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = {
__stringify(default_trap_handler))
COND_TRAP(do_secure_storage_access);
-COND_TRAP(do_non_secure_storage_access);
-COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 0ece156fdd7c..cd44be2b6ce8 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -49,6 +49,8 @@ static inline bool is_final_pt_regs(struct unwind_state *state,
READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
}
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -118,6 +120,8 @@ out_stop:
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long first_frame)
{
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index b88345ef8bd9..5b0633ea8d93 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -12,7 +12,6 @@
#include <linux/kdebug.h>
#include <linux/sched/task_stack.h>
-#include <asm/switch_to.h>
#include <asm/facility.h>
#include <asm/kprobes.h>
#include <asm/dis.h>
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index a5425075dd25..b99478e84da4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -2,7 +2,7 @@
/*
* Common Ultravisor functions and initialization
*
- * Copyright IBM Corp. 2019, 2020
+ * Copyright IBM Corp. 2019, 2024
*/
#define KMSG_COMPONENT "prot_virt"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -14,21 +14,29 @@
#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
+#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
+EXPORT_SYMBOL(prot_virt_guest);
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
-EXPORT_SYMBOL(uv_info);
static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
{
@@ -80,7 +88,7 @@ fail:
* Requests the Ultravisor to pin the page in the shared state. This will
* cause an intercept when the guest attempts to unshare the pinned page.
*/
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -92,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_pin_shared);
/*
* Requests the Ultravisor to destroy a guest page and make it
@@ -100,7 +109,7 @@ static int uv_pin_shared(unsigned long paddr)
*
* @paddr: Absolute host address of page to be destroyed
*/
-static int uv_destroy_page(unsigned long paddr)
+static int uv_destroy(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
@@ -121,20 +130,33 @@ static int uv_destroy_page(unsigned long paddr)
}
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio
*/
-int uv_destroy_owned_page(unsigned long paddr)
+int uv_destroy_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_destroy_page(paddr);
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_destroy(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
/*
* Requests the Ultravisor to encrypt a guest page and make it
@@ -154,65 +176,119 @@ int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio.
*/
-int uv_convert_owned_from_secure(unsigned long paddr)
+int uv_convert_from_secure_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_convert_from_secure(paddr);
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
+
+/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
/*
- * Calculate the expected ref_count for a page that would otherwise have no
+ * Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
* the kernel, but with some slight modifications. We know that a secure
- * page can not be a huge page for example.
+ * folio can not be a large folio, for example.
*/
-static int expected_page_refs(struct page *page)
+static int expected_folio_refs(struct folio *folio)
{
int res;
- res = page_mapcount(page);
- if (PageSwapCache(page)) {
+ res = folio_mapcount(folio);
+ if (folio_test_swapcache(folio)) {
res++;
- } else if (page_mapping(page)) {
+ } else if (folio_mapping(folio)) {
res++;
- if (page_has_private(page))
+ if (folio->private)
res++;
}
return res;
}
-static int make_secure_pte(pte_t *ptep, unsigned long addr,
- struct page *exp_page, struct uv_cb_header *uvcb)
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
- pte_t entry = READ_ONCE(*ptep);
- struct page *page;
int expected, cc = 0;
- if (!pte_present(entry))
- return -ENXIO;
- if (pte_val(entry) & _PAGE_INVALID)
- return -ENXIO;
-
- page = pte_page(entry);
- if (page != exp_page)
- return -ENXIO;
- if (PageWriteback(page))
- return -EAGAIN;
- expected = expected_page_refs(page);
- if (!page_ref_freeze(page, expected))
+ if (folio_test_writeback(folio))
return -EBUSY;
- set_bit(PG_arch_1, &page->flags);
+ expected = expected_folio_refs(folio) + 1;
+ if (!folio_ref_freeze(folio, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &folio->flags);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
@@ -222,9 +298,9 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
* -EAGAIN and we let the callers deal with it.
*/
cc = __uv_call(0, (u64)uvcb);
- page_ref_unfreeze(page, expected);
+ folio_ref_unfreeze(folio, expected);
/*
- * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+ * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
* If busy or partially completed, return -EAGAIN.
*/
if (cc == UVC_CC_OK)
@@ -234,164 +310,255 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
{
- struct vm_area_struct *vma;
- bool local_drain = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct page *page;
- pte_t *ptep;
int rc;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
+}
+
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
+ */
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
+{
+ int rc, tried_splits;
+
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
+
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
+ folio_lock(folio);
+ rc = split_folio(folio);
+ if (rc != -EBUSY) {
+ folio_unlock(folio);
+ return rc;
+ }
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- page = follow_page(vma, uaddr, FOLL_WRITE);
- if (IS_ERR_OR_NULL(page))
- goto out;
-
- lock_page(page);
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- rc = make_secure_pte(ptep, uaddr, page, uvcb);
- pte_unmap_unlock(ptep, ptelock);
- unlock_page(page);
-out:
- mmap_read_unlock(gmap->mm);
-
- if (rc == -EAGAIN) {
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- wait_on_page_writeback(page);
- } else if (rc == -EBUSY) {
/*
- * If we have tried a local drain and the page refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
*/
- if (local_drain) {
- lru_add_drain_all();
- /* We give up here, and let the caller try again */
- return -EAGAIN;
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
}
+
/*
- * We are here if the page refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
*/
- lru_add_drain();
- local_drain = true;
- /* And now we try again immediately after draining */
- goto again;
- } else if (rc == -ENXIO) {
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
+ folio_unlock(folio);
+
+ if (unlikely(!inode))
+ break;
+
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
}
- return rc;
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
+ struct vm_area_struct *vma;
+ struct folio_walk fw;
+ struct folio *folio;
+ int rc;
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
+
+ folio_get(folio);
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
+ folio_walk_end(&fw, vma);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
+ folio_put(folio);
- return gmap_make_secure(gmap, gaddr, &uvcb);
+ return rc;
}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+EXPORT_SYMBOL_GPL(make_hva_secure);
/*
- * To be called with the page locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the page concurrently. Having 2
- * parallel make_page_accessible is fine, as the UV calls will become a
- * no-op if the page is already exported.
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
*/
-int arch_make_page_accessible(struct page *page)
+int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* Hugepage cannot be protected, so nothing to do */
- if (PageHuge(page))
+ /* Large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
return 0;
/*
- * PG_arch_1 is used in 3 places:
- * 1. for kernel page tables during early boot
- * 2. for storage keys of huge pages and KVM
- * 3. As an indication that this page might be secure. This can
+ * PG_arch_1 is used in 2 places:
+ * 1. for storage keys of hugetlb folios and KVM
+ * 2. As an indication that this small folio might be secure. This can
* overindicate, e.g. we set the bit before calling
* convert_to_secure.
- * As secure pages are never huge, all 3 variants can co-exists.
+ * As secure pages are never large folios, both variants can co-exists.
*/
- if (!test_bit(PG_arch_1, &page->flags))
+ if (!test_bit(PG_arch_1, &folio->flags))
return 0;
- rc = uv_pin_shared(page_to_phys(page));
+ rc = uv_pin_shared(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
- rc = uv_convert_from_secure(page_to_phys(page));
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
return rc;
}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
-
-#endif
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
- uv_info.inst_calls_list[0],
- uv_info.inst_calls_list[1],
- uv_info.inst_calls_list[2],
- uv_info.inst_calls_list[3]);
+ return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
}
static struct kobj_attribute uv_query_facilities_attr =
__ATTR(facilities, 0444, uv_query_facilities, NULL);
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+ __ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+ __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+ __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+ __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+ __ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
static ssize_t uv_query_feature_indications(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -402,69 +569,208 @@ static struct kobj_attribute uv_query_feature_indications_attr =
__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%d\n",
- uv_info.max_guest_cpu_id + 1);
+ return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
}
static struct kobj_attribute uv_query_max_guest_cpus_attr =
__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%d\n",
- uv_info.max_num_sec_conf);
+ return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
}
static struct kobj_attribute uv_query_max_guest_vms_attr =
__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- return scnprintf(page, PAGE_SIZE, "%lx\n",
- uv_info.max_sec_stor_addr);
+ return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
}
static struct kobj_attribute uv_query_max_guest_addr_attr =
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+ __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+ __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+ __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+ __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+ __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n",
+ uv_info.max_assoc_secrets + uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+ __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
+static ssize_t uv_query_max_retr_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_retr_secrets);
+}
+
+static struct kobj_attribute uv_query_max_retr_secrets_attr =
+ __ATTR(max_retr_secrets, 0444, uv_query_max_retr_secrets, NULL);
+
+static ssize_t uv_query_max_assoc_secrets(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", uv_info.max_assoc_secrets);
+}
+
+static struct kobj_attribute uv_query_max_assoc_secrets_attr =
+ __ATTR(max_assoc_secrets, 0444, uv_query_max_assoc_secrets, NULL);
+
static struct attribute *uv_query_attrs[] = {
&uv_query_facilities_attr.attr,
&uv_query_feature_indications_attr.attr,
&uv_query_max_guest_cpus_attr.attr,
&uv_query_max_guest_vms_attr.attr,
&uv_query_max_guest_addr_attr.attr,
+ &uv_query_supp_se_hdr_ver_attr.attr,
+ &uv_query_supp_se_hdr_pcf_attr.attr,
+ &uv_query_dump_storage_state_len_attr.attr,
+ &uv_query_dump_finalize_len_attr.attr,
+ &uv_query_dump_cpu_len_attr.attr,
+ &uv_query_supp_att_req_hdr_ver_attr.attr,
+ &uv_query_supp_att_pflags_attr.attr,
+ &uv_query_supp_add_secret_req_ver_attr.attr,
+ &uv_query_supp_add_secret_pcf_attr.attr,
+ &uv_query_supp_secret_types_attr.attr,
+ &uv_query_max_secrets_attr.attr,
+ &uv_query_max_assoc_secrets_attr.attr,
+ &uv_query_max_retr_secrets_attr.attr,
NULL,
};
+static inline struct uv_cb_query_keys uv_query_keys(void)
+{
+ struct uv_cb_query_keys uvcb = {
+ .header.cmd = UVC_CMD_QUERY_KEYS,
+ .header.len = sizeof(uvcb)
+ };
+
+ uv_call(0, (uint64_t)&uvcb);
+ return uvcb;
+}
+
+static inline ssize_t emit_hash(struct uv_key_hash *hash, char *buf, int at)
+{
+ return sysfs_emit_at(buf, at, "%016llx%016llx%016llx%016llx\n",
+ hash->dword[0], hash->dword[1], hash->dword[2], hash->dword[3]);
+}
+
+static ssize_t uv_keys_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_host_key_attr =
+ __ATTR(host_key, 0444, uv_keys_host_key, NULL);
+
+static ssize_t uv_keys_backup_host_key(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+
+ return emit_hash(&uvcb.key_hashes[UVC_QUERY_KEYS_IDX_BACK_HK], buf, 0);
+}
+
+static struct kobj_attribute uv_keys_backup_host_key_attr =
+ __ATTR(backup_host_key, 0444, uv_keys_backup_host_key, NULL);
+
+static ssize_t uv_keys_all(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uv_cb_query_keys uvcb = uv_query_keys();
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(uvcb.key_hashes); i++)
+ len += emit_hash(uvcb.key_hashes + i, buf, len);
+
+ return len;
+}
+
+static struct kobj_attribute uv_keys_all_attr =
+ __ATTR(all, 0444, uv_keys_all, NULL);
+
static struct attribute_group uv_query_attr_group = {
.attrs = uv_query_attrs,
};
+static struct attribute *uv_keys_attrs[] = {
+ &uv_keys_host_key_attr.attr,
+ &uv_keys_backup_host_key_attr.attr,
+ &uv_keys_all_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_keys_attr_group = {
+ .attrs = uv_keys_attrs,
+};
+
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
- val = prot_virt_guest;
-#endif
- return scnprintf(page, PAGE_SIZE, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
- struct kobj_attribute *attr, char *page)
+ struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
- val = prot_virt_host;
-#endif
-
- return scnprintf(page, PAGE_SIZE, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@@ -480,9 +786,27 @@ static const struct attribute *uv_prot_virt_attrs[] = {
};
static struct kset *uv_query_kset;
+static struct kset *uv_keys_kset;
static struct kobject *uv_kobj;
-static int __init uv_info_init(void)
+static int __init uv_sysfs_dir_init(const struct attribute_group *grp,
+ struct kset **uv_dir_kset, const char *name)
+{
+ struct kset *kset;
+ int rc;
+
+ kset = kset_create_and_add(name, NULL, uv_kobj);
+ if (!kset)
+ return -ENOMEM;
+ *uv_dir_kset = kset;
+
+ rc = sysfs_create_group(&kset->kobj, grp);
+ if (rc)
+ kset_unregister(kset);
+ return rc;
+}
+
+static int __init uv_sysfs_init(void)
{
int rc = -ENOMEM;
@@ -497,17 +821,16 @@ static int __init uv_info_init(void)
if (rc)
goto out_kobj;
- uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
- if (!uv_query_kset) {
- rc = -ENOMEM;
+ rc = uv_sysfs_dir_init(&uv_query_attr_group, &uv_query_kset, "query");
+ if (rc)
goto out_ind_files;
- }
- rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
- if (!rc)
- return 0;
+ /* Get installed key hashes if available, ignore any errors */
+ if (test_bit_inv(BIT_UVC_CMD_QUERY_KEYS, uv_info.inst_calls_list))
+ uv_sysfs_dir_init(&uv_keys_attr_group, &uv_keys_kset, "keys");
+
+ return 0;
- kset_unregister(uv_query_kset);
out_ind_files:
sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
out_kobj:
@@ -515,5 +838,110 @@ out_kobj:
kobject_put(uv_kobj);
return rc;
}
-device_initcall(uv_info_init);
-#endif
+device_initcall(uv_sysfs_init);
+
+/*
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
+ *
+ * Context: might sleep.
+ */
+static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
+ const struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 i;
+
+ for (i = 0; i < list->total_num_secrets; i++) {
+ if (memcmp(secret_id, list->secrets[i].id, UV_SECRET_ID_LEN) == 0) {
+ *secret = list->secrets[i].hdr;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * Do the actual search for `uv_get_secret_metadata`.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Context: might sleep.
+ */
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
+{
+ u16 start_idx = 0;
+ u16 list_rc;
+ int ret;
+
+ do {
+ uv_list_secrets(list, start_idx, &list_rc, NULL);
+ if (list_rc != UVC_RC_EXECUTED && list_rc != UVC_RC_MORE_DATA) {
+ if (list_rc == UVC_RC_INV_CMD)
+ return -ENODEV;
+ else
+ return -EIO;
+ }
+ ret = find_secret_in_page(secret_id, list, secret);
+ if (ret == 0)
+ return ret;
+ start_idx = list->next_secret_idx;
+ } while (list_rc == UVC_RC_MORE_DATA && start_idx < list->next_secret_idx);
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(uv_find_secret);
+
+/**
+ * uv_retrieve_secret() - get the secret value for the secret index.
+ * @secret_idx: Secret index for which the secret should be retrieved.
+ * @buf: Buffer to store retrieved secret.
+ * @buf_size: Size of the buffer. The correct buffer size is reported as part of
+ * the result from `uv_get_secret_metadata`.
+ *
+ * Calls the Retrieve Secret UVC and translates the UV return code into an errno.
+ *
+ * Context: might sleep.
+ *
+ * Return:
+ * * %0 - Entry found; buffer contains a valid secret.
+ * * %ENOENT: - No entry found or secret at the index is non-retrievable.
+ * * %ENODEV: - Not supported: UV not available or command not available.
+ * * %EINVAL: - Buffer too small for content.
+ * * %EIO: - Other unexpected UV error.
+ */
+int uv_retrieve_secret(u16 secret_idx, u8 *buf, size_t buf_size)
+{
+ struct uv_cb_retr_secr uvcb = {
+ .header.len = sizeof(uvcb),
+ .header.cmd = UVC_CMD_RETR_SECRET,
+ .secret_idx = secret_idx,
+ .buf_addr = (u64)buf,
+ .buf_size = buf_size,
+ };
+
+ uv_call_sched(0, (u64)&uvcb);
+
+ switch (uvcb.header.rc) {
+ case UVC_RC_EXECUTED:
+ return 0;
+ case UVC_RC_INV_CMD:
+ return -ENODEV;
+ case UVC_RC_RETR_SECR_STORE_EMPTY:
+ case UVC_RC_RETR_SECR_INV_SECRET:
+ case UVC_RC_RETR_SECR_INV_IDX:
+ return -ENOENT;
+ case UVC_RC_RETR_SECR_BUF_SMALL:
+ return -EINVAL;
+ default:
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(uv_retrieve_secret);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 5075cde77b29..430feb1a5013 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -12,126 +12,20 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/smp.h>
-#include <linux/time_namespace.h>
#include <linux/random.h>
+#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
+#include <asm/alternative.h>
#include <asm/vdso.h>
extern char vdso64_start[], vdso64_end[];
extern char vdso32_start[], vdso32_end[];
-static struct vm_special_mapping vvar_mapping;
-
-static union {
- struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-
-struct vdso_data *vdso_data = vdso_data_store.data;
-
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
-#ifdef CONFIG_TIME_NS
-struct vdso_data *arch_get_vdso_data(void *vvar_page)
-{
- return (struct vdso_data *)(vvar_page);
-}
-
-static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
- if (likely(vma->vm_mm == current->mm))
- return current->nsproxy->time_ns->vvar_page;
- /*
- * VM_PFNMAP | VM_IO protect .fault() handler from being called
- * through interfaces like /proc/$pid/mem or
- * process_vm_{readv,writev}() as long as there's no .access()
- * in special_mapping_vmops().
- * For more details check_vma_flags() and __access_remote_vm()
- */
- WARN(1, "vvar_page accessed remotely");
- return NULL;
-}
-
-/*
- * The VVAR page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
-
- mmap_read_lock(mm);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- unsigned long size = vma->vm_end - vma->vm_start;
-
- if (!vma_is_special_mapping(vma, &vvar_mapping))
- continue;
- zap_page_range(vma, vma->vm_start, size);
- break;
- }
- mmap_read_unlock(mm);
- return 0;
-}
-#else
-static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
- return NULL;
-}
-#endif
-
-static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long addr, pfn;
- vm_fault_t err;
-
- switch (vmf->pgoff) {
- case VVAR_DATA_PAGE_OFFSET:
- pfn = virt_to_pfn(vdso_data);
- if (timens_page) {
- /*
- * Fault in VVAR page too, since it will be accessed
- * to get clock data anyway.
- */
- addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- err = vmf_insert_pfn(vma, addr, pfn);
- if (unlikely(err & VM_FAULT_ERROR))
- return err;
- pfn = page_to_pfn(timens_page);
- }
- break;
-#ifdef CONFIG_TIME_NS
- case VVAR_TIMENS_PAGE_OFFSET:
- /*
- * If a task belongs to a time namespace then a namespace
- * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
- * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
- * offset.
- * See also the comment near timens_setup_vdso_data().
- */
- if (!timens_page)
- return VM_FAULT_SIGBUS;
- pfn = virt_to_pfn(vdso_data);
- break;
-#endif /* CONFIG_TIME_NS */
- default:
- return VM_FAULT_SIGBUS;
- }
- return vmf_insert_pfn(vma, vmf->address, pfn);
-}
-
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *vma)
{
@@ -139,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-static struct vm_special_mapping vvar_mapping = {
- .name = "[vvar]",
- .fault = vvar_fault,
-};
-
static struct vm_special_mapping vdso64_mapping = {
.name = "[vdso]",
.mremap = vdso_mremap,
@@ -169,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
struct vm_area_struct *vma;
int rc;
- BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES);
if (mmap_write_lock_killable(mm))
return -EINTR;
@@ -184,17 +73,14 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
rc = vvar_start;
if (IS_ERR_VALUE(vvar_start))
goto out;
- vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
- VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
- VM_PFNMAP,
- &vvar_mapping);
+ vma = vdso_install_vvar_mapping(mm, vvar_start);
rc = PTR_ERR(vma);
if (IS_ERR(vma))
goto out;
- vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+ vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE;
/* VM_MAYWRITE for COW so gdb can set breakpoints */
vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
- VM_READ|VM_EXEC|
+ VM_READ|VM_EXEC|VM_SEALED_SYSMAP|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_mapping);
if (IS_ERR(vma)) {
@@ -226,7 +112,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
end -= len;
if (end > start) {
- offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+ offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
@@ -234,17 +120,22 @@ static unsigned long vdso_addr(unsigned long start, unsigned long len)
return addr;
}
-unsigned long vdso_size(void)
+unsigned long vdso_text_size(void)
{
- unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+ unsigned long size;
if (is_compat_task())
- size += vdso32_end - vdso32_start;
+ size = vdso32_end - vdso32_start;
else
- size += vdso64_end - vdso64_start;
+ size = vdso64_end - vdso64_start;
return PAGE_ALIGN(size);
}
+unsigned long vdso_size(void)
+{
+ return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE;
+}
+
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
unsigned long addr = VDSO_BASE;
@@ -269,8 +160,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end)
return pagelist;
}
+static void vdso_apply_alternatives(void)
+{
+ const struct elf64_shdr *alt, *shdr;
+ struct alt_instr *start, *end;
+ const struct elf64_hdr *hdr;
+
+ hdr = (struct elf64_hdr *)vdso64_start;
+ shdr = (void *)hdr + hdr->e_shoff;
+ alt = find_section(hdr, shdr, ".altinstructions");
+ if (!alt)
+ return;
+ start = (void *)hdr + alt->sh_offset;
+ end = (void *)hdr + alt->sh_offset + alt->sh_size;
+ apply_alternatives(start, end);
+}
+
static int __init vdso_init(void)
{
+ vdso_apply_alternatives();
vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
if (IS_ENABLED(CONFIG_COMPAT))
vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 245bddfe9bc0..1e4ddd1a683f 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,11 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
-ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-
-include $(srctree)/lib/vdso/Makefile
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
obj-vdso32 = vdso_user_wrapper-32.o note-32.o
# Build rules
@@ -20,9 +17,12 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_32 += -m31 -s
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_32 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin -fasynchronous-unwind-tables
-LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+LDFLAGS_vdso32.so.dbg += -shared -soname=linux-vdso32.so.1 \
--hash-style=both --build-id=sha1 -melf_s390 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
@@ -32,17 +32,14 @@ obj-y += vdso32_wrapper.o
targets += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,ld)
+quiet_cmd_vdso_and_check = VDSO $@
+ cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE
+ $(call if_changed,vdso_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -58,18 +55,8 @@ quiet_cmd_vdso32as = VDSO32A $@
quiet_cmd_vdso32cc = VDSO32C $@
cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
-
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
index edf5ff1debe1..9630d58c2080 100644
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -6,18 +6,16 @@
#include <asm/page.h>
#include <asm/vdso.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
index 3f42f27f978c..2e645003fdaf 100644
--- a/arch/s390/kernel/vdso32/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
@@ -1,12 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
#include <asm/unistd.h>
#include <asm/dwarf.h>
.macro vdso_syscall func,syscall
.globl __kernel_compat_\func
.type __kernel_compat_\func,@function
- .align 8
+ __ALIGN
__kernel_compat_\func:
CFI_STARTPROC
svc \syscall
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 9e2b95a222a9..d8f0df742809 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,17 +1,19 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
-ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-
-include $(srctree)/lib/vdso/Makefile
-obj-vdso64 = vdso_user_wrapper.o note.o
-obj-cvdso64 = vdso64_generic.o getcpu.o
-VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
+obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
+obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
+endif
+
# Build rules
targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
@@ -22,11 +24,15 @@ KBUILD_AFLAGS += -DBUILD_VDSO
KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_64 += -m64 -s
+KBUILD_AFLAGS_64 += -m64
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+KBUILD_CFLAGS_64 := $(filter-out -mpacked-stack,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -munaligned-symbols,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 := $(filter-out -fno-asynchronous-unwind-tables,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin -fasynchronous-unwind-tables
+ldflags-y := -shared -soname=linux-vdso64.so.1 \
--hash-style=both --build-id=sha1 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
@@ -36,18 +42,15 @@ obj-y += vdso64_wrapper.o
targets += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+quiet_cmd_vdso_and_check = VDSO $@
+ cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
- $(call if_changed,ld)
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+ $(call if_changed,vdso_and_check)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -67,18 +70,8 @@ quiet_cmd_vdso64as = VDSO64A $@
quiet_cmd_vdso64cc = VDSO64C $@
cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso64.so
-
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
index 34c7a2312f9d..9e5397e7b590 100644
--- a/arch/s390/kernel/vdso64/vdso.h
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
index 4461ea151e49..e4f6551ae898 100644
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -4,20 +4,19 @@
* library
*/
+#include <asm/vdso/vsyscall.h>
#include <asm/page.h>
#include <asm/vdso.h>
+#include <vdso/datapage.h>
OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
SECTIONS
{
- PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
-#ifdef CONFIG_TIME_NS
- PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
-#endif
- . = VDSO_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -43,6 +42,10 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ . = ALIGN(8);
+ .altinstructions : { *(.altinstructions) }
+ .altinstr_replacement : { *(.altinstr_replacement) }
+
.dynamic : { *(.dynamic) } :text :dynamic
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
@@ -141,6 +144,7 @@ VERSION
__kernel_restart_syscall;
__kernel_rt_sigreturn;
__kernel_sigreturn;
+ __kernel_getrandom;
local: *;
};
}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 97f0c0a669a5..aa06c85bcbd3 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -1,12 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
#include <asm/vdso.h>
#include <asm/unistd.h>
#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
#include <asm/ptrace.h>
-#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
-
/*
* Older glibc version called vdso without allocating a stackframe. This wrapper
* is just used to allocate a stackframe. See
@@ -14,23 +13,23 @@
* for details.
*/
.macro vdso_func func
- .globl __kernel_\func
- .type __kernel_\func,@function
- .align 8
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
- aghi %r15,-WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ aghi %r15,-STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_USER_OVERHEAD + STACK_FRAME_VDSO_OVERHEAD)
+ CFI_VAL_OFFSET 15,-STACK_FRAME_USER_OVERHEAD
+ stg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_REL_OFFSET 14,__SFVDSO_RETURN_ADDRESS
+ xc __SFUSER_BACKCHAIN(8,%r15),__SFUSER_BACKCHAIN(%r15)
brasl %r14,__s390_vdso_\func
- lg %r14,STACK_FRAME_OVERHEAD(%r15)
- aghi %r15,WRAPPER_FRAME_SIZE
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ lg %r14,__SFVDSO_RETURN_ADDRESS(%r15)
+ CFI_RESTORE 14
+ aghi %r15,STACK_FRAME_VDSO_OVERHEAD
+ CFI_DEF_CFA_OFFSET STACK_FRAME_USER_OVERHEAD
CFI_RESTORE 15
br %r14
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_func gettimeofday
@@ -39,16 +38,13 @@ vdso_func clock_gettime
vdso_func getcpu
.macro vdso_syscall func,syscall
- .globl __kernel_\func
- .type __kernel_\func,@function
- .align 8
-__kernel_\func:
+SYM_FUNC_START(__kernel_\func)
CFI_STARTPROC
svc \syscall
/* Make sure we notice when a syscall returns, which shouldn't happen */
.word 0
CFI_ENDPROC
- .size __kernel_\func,.-__kernel_\func
+SYM_FUNC_END(__kernel_\func)
.endm
vdso_syscall restart_syscall,__NR_restart_syscall
diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
new file mode 100644
index 000000000000..09c034c2f853
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/alternative.h>
+#include <asm/dwarf.h>
+#include <asm/fpu-insn.h>
+
+#define STATE0 %v0
+#define STATE1 %v1
+#define STATE2 %v2
+#define STATE3 %v3
+#define COPY0 %v4
+#define COPY1 %v5
+#define COPY2 %v6
+#define COPY3 %v7
+#define BEPERM %v19
+#define TMP0 %v20
+#define TMP1 %v21
+#define TMP2 %v22
+#define TMP3 %v23
+
+ .section .rodata
+
+ .balign 32
+SYM_DATA_START_LOCAL(chacha20_constants)
+ .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
+ .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
+SYM_DATA_END(chacha20_constants)
+
+ .text
+/*
+ * s390 ChaCha20 implementation meant for vDSO. Produces a given positive
+ * number of blocks of output with nonce 0, taking an input key and 8-bytes
+ * counter. Does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ * const uint8_t *key,
+ * uint32_t *counter,
+ * size_t nblocks)
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+ CFI_STARTPROC
+ larl %r1,chacha20_constants
+
+ /* COPY0 = "expand 32-byte k" */
+ VL COPY0,0,,%r1
+
+ /* BEPERM = byte selectors for VPERM */
+ ALTERNATIVE __stringify(VL BEPERM,16,,%r1), "brcl 0,0", ALT_FACILITY(148)
+
+ /* COPY1,COPY2 = key */
+ VLM COPY1,COPY2,0,%r3
+
+ /* COPY3 = counter || zero nonce */
+ lg %r3,0(%r4)
+ VZERO COPY3
+ VLVGG COPY3,%r3,0
+
+ lghi %r1,0
+.Lblock:
+ VLR STATE0,COPY0
+ VLR STATE1,COPY1
+ VLR STATE2,COPY2
+ VLR STATE3,COPY3
+
+ lghi %r0,10
+.Ldoubleround:
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
+ VSLDB STATE1,STATE1,STATE1,4
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
+ VSLDB STATE3,STATE3,STATE3,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,16
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,12
+
+ /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
+ VAF STATE0,STATE0,STATE1
+ VX STATE3,STATE3,STATE0
+ VERLLF STATE3,STATE3,8
+
+ /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
+ VAF STATE2,STATE2,STATE3
+ VX STATE1,STATE1,STATE2
+ VERLLF STATE1,STATE1,7
+
+ /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
+ VSLDB STATE1,STATE1,STATE1,12
+ /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
+ VSLDB STATE2,STATE2,STATE2,8
+ /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
+ VSLDB STATE3,STATE3,STATE3,4
+ brctg %r0,.Ldoubleround
+
+ /* OUTPUT0 = STATE0 + COPY0 */
+ VAF STATE0,STATE0,COPY0
+ /* OUTPUT1 = STATE1 + COPY1 */
+ VAF STATE1,STATE1,COPY1
+ /* OUTPUT2 = STATE2 + COPY2 */
+ VAF STATE2,STATE2,COPY2
+ /* OUTPUT3 = STATE3 + COPY3 */
+ VAF STATE3,STATE3,COPY3
+
+ ALTERNATIVE \
+ __stringify( \
+ /* Convert STATE to little endian and store to OUTPUT */\
+ VPERM TMP0,STATE0,STATE0,BEPERM; \
+ VPERM TMP1,STATE1,STATE1,BEPERM; \
+ VPERM TMP2,STATE2,STATE2,BEPERM; \
+ VPERM TMP3,STATE3,STATE3,BEPERM; \
+ VSTM TMP0,TMP3,0,%r2), \
+ __stringify( \
+ /* 32 bit wise little endian store to OUTPUT */ \
+ VSTBRF STATE0,0,,%r2; \
+ VSTBRF STATE1,16,,%r2; \
+ VSTBRF STATE2,32,,%r2; \
+ VSTBRF STATE3,48,,%r2; \
+ brcl 0,0), \
+ ALT_FACILITY(148)
+
+ /* ++COPY3.COUNTER */
+ /* alsih %r3,1 */
+ .insn rilu,0xcc0a00000000,%r3,1
+ alcr %r3,%r1
+ VLVGG COPY3,%r3,0
+
+ /* OUTPUT += 64, --NBLOCKS */
+ aghi %r2,64
+ brctg %r5,.Lblock
+
+ /* COUNTER = COPY3.COUNTER */
+ stg %r3,0(%r4)
+
+ /* Zero out potentially sensitive regs */
+ VZERO STATE0
+ VZERO STATE1
+ VZERO STATE2
+ VZERO STATE3
+ VZERO COPY1
+ VZERO COPY2
+
+ /* Early exit if TMP0-TMP3 have not been used */
+ ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
+
+ VZERO TMP0
+ VZERO TMP1
+ VZERO TMP2
+ VZERO TMP3
+
+ br %r14
+ CFI_ENDPROC
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c
new file mode 100644
index 000000000000..b5268b507fb5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/facility.h>
+#include <uapi/asm-generic/errno.h>
+#include "vdso.h"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+ if (test_facility(129))
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+ if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
+ return -ENOSYS;
+ return getrandom_syscall(buffer, len, flags);
+}
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
new file mode 100644
index 000000000000..cc8933e04ff7
--- /dev/null
+++ b/arch/s390/kernel/vmcore_info.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <linux/mm.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ struct lowcore *abs_lc;
+
+ VMCOREINFO_SYMBOL(lowcore_ptr);
+ VMCOREINFO_SYMBOL(high_memory);
+ VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+ vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+ vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+ vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base);
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys);
+ abs_lc = get_abs_lowcore();
+ abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+ put_abs_lowcore(abs_lc);
+}
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 2e526f11b91e..ff1ddba96352 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -14,9 +14,13 @@
#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \
*(.bss..invalid_pg_dir)
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define RUNTIME_DISCARD_EXIT
+
#define EMITS_PT_NOTE
#include <asm-generic/vmlinux.lds.h>
@@ -35,14 +39,13 @@ PHDRS {
SECTIONS
{
- . = 0x100000;
+ . = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
HEAD_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
@@ -65,11 +68,22 @@ SECTIONS
*(.data..ro_after_init)
JUMP_TABLE_DATA
} :data
- EXCEPTION_TABLE(16)
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
+
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+ .data.rel : {
+ *(.data.rel*)
+ }
BOOT_DATA_PRESERVED
. = ALIGN(8);
@@ -79,6 +93,7 @@ SECTIONS
_end_amode31_refs = .;
}
+ . = ALIGN(PAGE_SIZE);
_edata = .; /* End of data section */
/* will be freed after init */
@@ -131,6 +146,7 @@ SECTIONS
/*
* Table with the patch locations to undo expolines
*/
+ . = ALIGN(4);
.nospec_call_table : {
__nospec_call_start = . ;
*(.s390_indirect*)
@@ -167,31 +183,23 @@ SECTIONS
.amode31.data : {
*(.amode31.data)
}
- . = ALIGN(PAGE_SIZE);
+ . = _samode31 + AMODE31_SIZE;
_eamode31 = .;
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
- PERCPU_SECTION(0x100)
+ RUNTIME_CONST_VARIABLES
- .dynsym ALIGN(8) : {
- __dynsym_start = .;
- *(.dynsym)
- __dynsym_end = .;
- }
- .rela.dyn ALIGN(8) : {
- __rela_dyn_start = .;
- *(.rela*)
- __rela_dyn_end = .;
- }
+ PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
_end = . ;
/*
@@ -199,7 +207,6 @@ SECTIONS
* it should match struct vmlinux_info
*/
.vmlinux.info 0 (INFO) : {
- QUAD(_stext) /* default_lma */
QUAD(startup_continue) /* entry */
QUAD(__bss_start - _stext) /* image_size */
QUAD(__bss_stop - __bss_start) /* bss_size */
@@ -208,10 +215,21 @@ SECTIONS
QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
QUAD(__boot_data_preserved_end -
__boot_data_preserved_start) /* bootdata_preserved_size */
- QUAD(__dynsym_start) /* dynsym_start */
- QUAD(__rela_dyn_start) /* rela_dyn_start */
- QUAD(__rela_dyn_end) /* rela_dyn_end */
+ QUAD(__got_start) /* got_start */
+ QUAD(__got_end) /* got_end */
QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
+#ifdef CONFIG_KASAN
+ QUAD(kasan_early_shadow_page)
+ QUAD(kasan_early_shadow_pte)
+ QUAD(kasan_early_shadow_pmd)
+ QUAD(kasan_early_shadow_pud)
+ QUAD(kasan_early_shadow_p4d)
+#endif
} :NONE
/* Debugging sections. */
@@ -219,9 +237,32 @@ SECTIONS
DWARF_DEBUG
ELF_DETAILS
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the three reserved double words.
+ */
+ .got.plt : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 || SIZEOF(.got.plt) == 0x18, "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+
/* Sections to be discarded */
DISCARDS
/DISCARD/ : {
*(.eh_frame)
+ *(.interp)
}
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9436f3053b88..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -7,13 +7,13 @@
*/
#include <linux/kernel_stat.h>
-#include <linux/sched/cputime.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/time.h>
#include <asm/alternative.h>
+#include <asm/cputime.h>
#include <asm/vtimer.h>
#include <asm/vtime.h>
#include <asm/cpu_mf.h>
@@ -33,24 +33,17 @@ static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
-static inline u64 get_vtimer(void)
-{
- u64 timer;
-
- asm volatile("stpt %0" : "=Q" (timer));
- return timer;
-}
-
static inline void set_vtimer(u64 expires)
{
+ struct lowcore *lc = get_lowcore();
u64 timer;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
: "=Q" (timer) : "Q" (expires));
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
+ lc->system_timer += lc->last_update_timer - timer;
+ lc->last_update_timer = expires;
}
static inline int virt_timer_forward(u64 elapsed)
@@ -125,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
static int do_account_vtime(struct task_struct *tsk)
{
u64 timer, clock, user, guest, system, hardirq, softirq;
+ struct lowcore *lc = get_lowcore();
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
+ timer = lc->last_update_timer;
+ clock = lc->last_update_clock;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" stckf %1" /* Store current tod clock value */
- : "=Q" (S390_lowcore.last_update_timer),
- "=Q" (S390_lowcore.last_update_clock)
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
: : "cc");
- clock = S390_lowcore.last_update_clock - clock;
- timer -= S390_lowcore.last_update_timer;
+ clock = lc->last_update_clock - clock;
+ timer -= lc->last_update_timer;
if (hardirq_count())
- S390_lowcore.hardirq_timer += timer;
+ lc->hardirq_timer += timer;
else
- S390_lowcore.system_timer += timer;
+ lc->system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
@@ -149,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk)
/* Calculate cputime delta */
user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(S390_lowcore.user_timer));
+ READ_ONCE(lc->user_timer));
guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(S390_lowcore.guest_timer));
+ READ_ONCE(lc->guest_timer));
system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(S390_lowcore.system_timer));
+ READ_ONCE(lc->system_timer));
hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(S390_lowcore.hardirq_timer));
+ READ_ONCE(lc->hardirq_timer));
softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(S390_lowcore.softirq_timer));
- S390_lowcore.steal_timer +=
+ READ_ONCE(lc->softirq_timer));
+ lc->steal_timer +=
clock - user - guest - system - hardirq - softirq;
/* Push account value */
@@ -184,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk)
void vtime_task_switch(struct task_struct *prev)
{
+ struct lowcore *lc = get_lowcore();
+
do_account_vtime(prev);
- prev->thread.user_timer = S390_lowcore.user_timer;
- prev->thread.guest_timer = S390_lowcore.guest_timer;
- prev->thread.system_timer = S390_lowcore.system_timer;
- prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
- prev->thread.softirq_timer = S390_lowcore.softirq_timer;
- S390_lowcore.user_timer = current->thread.user_timer;
- S390_lowcore.guest_timer = current->thread.guest_timer;
- S390_lowcore.system_timer = current->thread.system_timer;
- S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
- S390_lowcore.softirq_timer = current->thread.softirq_timer;
+ prev->thread.user_timer = lc->user_timer;
+ prev->thread.guest_timer = lc->guest_timer;
+ prev->thread.system_timer = lc->system_timer;
+ prev->thread.hardirq_timer = lc->hardirq_timer;
+ prev->thread.softirq_timer = lc->softirq_timer;
+ lc->user_timer = current->thread.user_timer;
+ lc->guest_timer = current->thread.guest_timer;
+ lc->system_timer = current->thread.system_timer;
+ lc->hardirq_timer = current->thread.hardirq_timer;
+ lc->softirq_timer = current->thread.softirq_timer;
}
/*
@@ -204,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 steal, avg_steal;
if (do_account_vtime(tsk))
virt_timer_expire();
- steal = S390_lowcore.steal_timer;
- avg_steal = S390_lowcore.avg_steal_timer / 2;
+ steal = lc->steal_timer;
+ avg_steal = lc->avg_steal_timer;
if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
+ lc->steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
- S390_lowcore.avg_steal_timer = avg_steal;
+ lc->avg_steal_timer = avg_steal / 2;
}
static u64 vtime_delta(void)
{
- u64 timer = S390_lowcore.last_update_timer;
-
- S390_lowcore.last_update_timer = get_vtimer();
+ struct lowcore *lc = get_lowcore();
+ u64 timer = lc->last_update_timer;
- return timer - S390_lowcore.last_update_timer;
+ lc->last_update_timer = get_cpu_timer();
+ return timer - lc->last_update_timer;
}
/*
@@ -234,12 +231,13 @@ static u64 vtime_delta(void)
*/
void vtime_account_kernel(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 delta = vtime_delta();
if (tsk->flags & PF_VCPU)
- S390_lowcore.guest_timer += delta;
+ lc->guest_timer += delta;
else
- S390_lowcore.system_timer += delta;
+ lc->system_timer += delta;
virt_timer_forward(delta);
}
@@ -249,7 +247,7 @@ void vtime_account_softirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.softirq_timer += delta;
+ get_lowcore()->softirq_timer += delta;
virt_timer_forward(delta);
}
@@ -258,7 +256,7 @@ void vtime_account_hardirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.hardirq_timer += delta;
+ get_lowcore()->hardirq_timer += delta;
virt_timer_forward(delta);
}
diff --git a/arch/s390/kernel/wti.c b/arch/s390/kernel/wti.c
new file mode 100644
index 000000000000..949fdbf0e8b6
--- /dev/null
+++ b/arch/s390/kernel/wti.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for warning track interruption
+ *
+ * Copyright IBM Corp. 2023
+ */
+
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/smpboot.h>
+#include <linux/irq.h>
+#include <uapi/linux/sched/types.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/sclp.h>
+
+#define WTI_DBF_LEN 64
+
+struct wti_debug {
+ unsigned long missed;
+ unsigned long addr;
+ pid_t pid;
+};
+
+struct wti_state {
+ /* debug data for s390dbf */
+ struct wti_debug dbg;
+ /*
+ * Represents the real-time thread responsible to
+ * acknowledge the warning-track interrupt and trigger
+ * preliminary and postliminary precautions.
+ */
+ struct task_struct *thread;
+ /*
+ * If pending is true, the real-time thread must be scheduled.
+ * If not, a wake up of that thread will remain a noop.
+ */
+ bool pending;
+};
+
+static DEFINE_PER_CPU(struct wti_state, wti_state);
+
+static debug_info_t *wti_dbg;
+
+/*
+ * During a warning-track grace period, interrupts are disabled
+ * to prevent delays of the warning-track acknowledgment.
+ *
+ * Once the CPU is physically dispatched again, interrupts are
+ * re-enabled.
+ */
+
+static void wti_irq_disable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* disable all I/O interrupts */
+ cr6.val &= ~0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void wti_irq_enable(void)
+{
+ unsigned long flags;
+ struct ctlreg cr6;
+
+ local_irq_save(flags);
+ local_ctl_store(6, &cr6);
+ /* enable all I/O interrupts */
+ cr6.val |= 0xff000000UL;
+ local_ctl_load(6, &cr6);
+ local_irq_restore(flags);
+}
+
+static void store_debug_data(struct wti_state *st)
+{
+ struct pt_regs *regs = get_irq_regs();
+
+ st->dbg.pid = current->pid;
+ st->dbg.addr = 0;
+ if (!user_mode(regs))
+ st->dbg.addr = regs->psw.addr;
+}
+
+static void wti_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct wti_state *st = this_cpu_ptr(&wti_state);
+
+ inc_irq_stat(IRQEXT_WTI);
+ wti_irq_disable();
+ store_debug_data(st);
+ st->pending = true;
+ wake_up_process(st->thread);
+}
+
+static int wti_pending(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ return st->pending;
+}
+
+static void wti_dbf_grace_period(struct wti_state *st)
+{
+ struct wti_debug *wdi = &st->dbg;
+ char buf[WTI_DBF_LEN];
+
+ if (wdi->addr)
+ snprintf(buf, sizeof(buf), "%d %pS", wdi->pid, (void *)wdi->addr);
+ else
+ snprintf(buf, sizeof(buf), "%d <user>", wdi->pid);
+ debug_text_event(wti_dbg, 2, buf);
+ wdi->missed++;
+}
+
+static int wti_show(struct seq_file *seq, void *v)
+{
+ struct wti_state *st;
+ int cpu;
+
+ cpus_read_lock();
+ seq_puts(seq, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(seq, "CPU%-8d", cpu);
+ seq_putc(seq, '\n');
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ seq_printf(seq, " %10lu", st->dbg.missed);
+ }
+ seq_putc(seq, '\n');
+ cpus_read_unlock();
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(wti);
+
+static void wti_thread_fn(unsigned int cpu)
+{
+ struct wti_state *st = per_cpu_ptr(&wti_state, cpu);
+
+ st->pending = false;
+ /*
+ * Yield CPU voluntarily to the hypervisor. Control
+ * resumes when hypervisor decides to dispatch CPU
+ * to this LPAR again.
+ */
+ if (diag49c(DIAG49C_SUBC_ACK))
+ wti_dbf_grace_period(st);
+ wti_irq_enable();
+}
+
+static struct smp_hotplug_thread wti_threads = {
+ .store = &wti_state.thread,
+ .thread_should_run = wti_pending,
+ .thread_fn = wti_thread_fn,
+ .thread_comm = "cpuwti/%u",
+ .selfparking = false,
+};
+
+static int __init wti_init(void)
+{
+ struct sched_param wti_sched_param = { .sched_priority = MAX_RT_PRIO - 1 };
+ struct dentry *wti_dir;
+ struct wti_state *st;
+ int cpu, rc;
+
+ rc = -EOPNOTSUPP;
+ if (!sclp.has_wti)
+ goto out;
+ rc = smpboot_register_percpu_thread(&wti_threads);
+ if (WARN_ON(rc))
+ goto out;
+ for_each_online_cpu(cpu) {
+ st = per_cpu_ptr(&wti_state, cpu);
+ sched_setscheduler(st->thread, SCHED_FIFO, &wti_sched_param);
+ }
+ rc = register_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+ if (rc) {
+ pr_warn("Couldn't request external interrupt 0x1007\n");
+ goto out_thread;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_WARNING_TRACK);
+ rc = diag49c(DIAG49C_SUBC_REG);
+ if (rc) {
+ pr_warn("Failed to register warning track interrupt through DIAG 49C\n");
+ rc = -EOPNOTSUPP;
+ goto out_subclass;
+ }
+ wti_dir = debugfs_create_dir("wti", arch_debugfs_dir);
+ debugfs_create_file("stat", 0400, wti_dir, NULL, &wti_fops);
+ wti_dbg = debug_register("wti", 1, 1, WTI_DBF_LEN);
+ if (!wti_dbg) {
+ rc = -ENOMEM;
+ goto out_debug_register;
+ }
+ rc = debug_register_view(wti_dbg, &debug_hex_ascii_view);
+ if (rc)
+ goto out_debug_register;
+ goto out;
+out_debug_register:
+ debug_unregister(wti_dbg);
+out_subclass:
+ irq_subclass_unregister(IRQ_SUBCLASS_WARNING_TRACK);
+ unregister_external_irq(EXT_IRQ_WARNING_TRACK, wti_interrupt);
+out_thread:
+ smpboot_unregister_percpu_thread(&wti_threads);
+out:
+ return rc;
+}
+late_initcall(wti_init);