From 914d52e46490b6599b7f03fad233f4f19bf23cf7 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 4 Jul 2019 16:18:15 +0200
Subject: s390: implement perf_arch_fetch_caller_regs

On s390 bpf_get_stack_raw_tp() returns 0 entries for both kernel and
user stacks. While there is no practical unwinding solution for userspace
on s390 at this moment, there certainly is a kernel unwinder. However,
it is not properly integrated with BPF.

In order to start unwinding, bpf_get_stack_raw_tp() obtains the current
kernel register values using perf_fetch_caller_regs(), which is not
implemented for s390. The actual unwinding then happens by passing those
registers to perf_callchain_kernel().

Implement perf_arch_fetch_caller_regs() for s390, where
__builtin_frame_address(0) points to back_chain.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/perf_event.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4652ffffe0b2..b9da71632827 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -12,6 +12,7 @@
 
 #include <linux/perf_event.h>
 #include <linux/device.h>
+#include <asm/stacktrace.h>
 
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
@@ -73,4 +74,10 @@ struct perf_sf_sde_regs {
 #define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 #define SAMPLE_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
+#define perf_arch_fetch_caller_regs(regs, __ip) do {			\
+	(regs)->psw.addr = (__ip);					\
+	(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) -	\
+		offsetof(struct stack_frame, back_chain);		\
+} while (0)
+
 #endif /* _ASM_S390_PERF_EVENT_H */
-- 
cgit v1.2.3-59-g8ed1b


From a2308c11ecbc3471ebb7435ee8075815b1502ef0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Nov 2019 13:09:52 +0100
Subject: s390/smp,vdso: fix ASCE handling

When a secondary CPU is brought up it must initialize its control
registers. CPU A which triggers that a secondary CPU B is brought up
stores its control register contents into the lowcore of new CPU B,
which then loads these values on startup.

This is problematic in various ways: the control register which
contains the home space ASCE will correctly contain the kernel ASCE;
however control registers for primary and secondary ASCEs are
initialized with whatever values were present in CPU A.

Typically:
- the primary ASCE will contain the user process ASCE of the process
  that triggered onlining of CPU B.
- the secondary ASCE will contain the percpu VDSO ASCE of CPU A.

Due to lazy ASCE handling we may also end up with other combinations.

When then CPU B switches to a different process (!= idle) it will
fixup the primary ASCE. However the problem is that the (wrong) ASCE
from CPU A was loaded into control register 1: as soon as an ASCE is
attached (aka loaded) a CPU is free to generate TLB entries using that
address space.
Even though it is very unlikey that CPU B will actually generate such
entries, this could result in TLB entries of the address space of the
process that ran on CPU A. These entries shouldn't exist at all and
could cause problems later on.

Furthermore the secondary ASCE of CPU B will not be updated correctly.
This means that processes may see wrong results or even crash if they
access VDSO data on CPU B. The correct VDSO ASCE will eventually be
loaded on return to user space as soon as the kernel executed a call
to strnlen_user or an atomic futex operation on CPU B.

Fix both issues by intializing the to be loaded control register
contents with the correct ASCEs and also enforce (re-)loading of the
ASCEs upon first context switch and return to user space.

Fixes: 0aaba41b58bc ("s390: remove all code using the access register mode")
Cc: stable@vger.kernel.org # v4.15+
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/smp.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6acdcf1d4074..06dddd7c4290 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -262,10 +262,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 	lc->spinlock_index = 0;
 	lc->percpu_offset = __per_cpu_offset[cpu];
 	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = S390_lowcore.kernel_asce;
 	lc->machine_flags = S390_lowcore.machine_flags;
 	lc->user_timer = lc->system_timer =
 		lc->steal_timer = lc->avg_steal_timer = 0;
 	__ctl_store(lc->cregs_save_area, 0, 15);
+	lc->cregs_save_area[1] = lc->kernel_asce;
+	lc->cregs_save_area[7] = lc->vdso_asce;
 	save_access_regs((unsigned int *) lc->access_regs_save_area);
 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
 	       sizeof(lc->stfle_fac_list));
@@ -844,6 +847,8 @@ static void smp_init_secondary(void)
 
 	S390_lowcore.last_update_clock = get_tod_clock();
 	restore_access_regs(S390_lowcore.access_regs_save_area);
+	set_cpu_flag(CIF_ASCE_PRIMARY);
+	set_cpu_flag(CIF_ASCE_SECONDARY);
 	cpu_init();
 	preempt_disable();
 	init_cpu_timer();
-- 
cgit v1.2.3-59-g8ed1b


From 5a5525b0488ce31e19065f8527dbf50266b5b712 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Nov 2019 09:38:37 +0100
Subject: s390/vdso: fix getcpu

getcpu reads the required values for cpu and node with two
instructions. This might lead to an inconsistent result if user space
gets preempted and migrated to a different CPU between the two
instructions.

Fix this by using just a single instruction to read both values at
once.

This is currently rather a theoretical bug, since there is no real
NUMA support available (except for NUMA emulation).

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/vdso.h     | 13 +++++++++++--
 arch/s390/kernel/asm-offsets.c   |  3 +--
 arch/s390/kernel/vdso32/getcpu.S |  4 +---
 arch/s390/kernel/vdso64/getcpu.S |  4 +---
 4 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 169d7604eb80..3bcfdeb01395 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -41,8 +41,17 @@ struct vdso_data {
 struct vdso_per_cpu_data {
 	__u64 ectg_timer_base;
 	__u64 ectg_user_time;
-	__u32 cpu_nr;
-	__u32 node_id;
+	/*
+	 * Note: node_id and cpu_nr must be at adjacent memory locations.
+	 * VDSO userspace must read both values with a single instruction.
+	 */
+	union {
+		__u64 getcpu_val;
+		struct {
+			__u32 node_id;
+			__u32 cpu_nr;
+		};
+	};
 };
 
 extern struct vdso_data *vdso_data;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 41ac4ad21311..ce33406cfe83 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -78,8 +78,7 @@ int main(void)
 	OFFSET(__VDSO_TS_END, vdso_data, ts_end);
 	OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
 	OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
-	OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
-	OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
+	OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
 	BLANK();
 	/* constants used by the vdso */
 	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
index 25515f3fbcea..dc79e169f0ad 100644
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ b/arch/s390/kernel/vdso32/getcpu.S
@@ -16,10 +16,8 @@
 	.type  __kernel_getcpu,@function
 __kernel_getcpu:
 	CFI_STARTPROC
-	la	%r4,0
 	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
+	lm	%r4,%r5,__VDSO_GETCPU_VAL(%r0)
 	sacf	0
 	ltr	%r2,%r2
 	jz	2f
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
index 2446e9dac8ab..3c04f7328500 100644
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -16,10 +16,8 @@
 	.type  __kernel_getcpu,@function
 __kernel_getcpu:
 	CFI_STARTPROC
-	la	%r4,0
 	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
+	lm	%r4,%r5,__VDSO_GETCPU_VAL(%r0)
 	sacf	0
 	ltgr	%r2,%r2
 	jz	2f
-- 
cgit v1.2.3-59-g8ed1b


From c2e06e15ad92bad94b54df257c683f7e715238a1 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 12:08:44 +0100
Subject: s390: always inline disabled_wait

disabled_wait uses _THIS_IP_ and assumes that compiler would inline it.
Make sure this assumption is always correct by utilizing __always_inline.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 881fc37c11c6..361ef5eda468 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -310,7 +310,7 @@ void enabled_wait(void);
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void __noreturn disabled_wait(void)
+static __always_inline void __noreturn disabled_wait(void)
 {
 	psw_t psw;
 
-- 
cgit v1.2.3-59-g8ed1b


From 7f28dad395243c5026d649136823bbc40029a828 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 12:19:16 +0100
Subject: s390: disable preemption when switching to nodat stack with
 CALL_ON_STACK

Make sure preemption is disabled when temporary switching to nodat
stack with CALL_ON_STACK helper, because nodat stack is per cpu.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/machine_kexec.c |  2 ++
 arch/s390/mm/maccess.c           | 12 +++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 444a19125a81..dcaadceaf6ef 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image)
 #ifdef CONFIG_CRASH_DUMP
 	int rc;
 
+	preempt_disable();
 	rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
+	preempt_enable();
 	return rc == 0;
 #else
 	return false;
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 59ad7997fed1..de7ca4b6718f 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -119,9 +119,15 @@ static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
  */
 int memcpy_real(void *dest, void *src, size_t count)
 {
-	if (S390_lowcore.nodat_stack != 0)
-		return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
-				     3, dest, src, count);
+	int rc;
+
+	if (S390_lowcore.nodat_stack != 0) {
+		preempt_disable();
+		rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3,
+				   dest, src, count);
+		preempt_enable();
+		return rc;
+	}
 	/*
 	 * This is a really early memcpy_real call, the stacks are
 	 * not set up yet. Just call _memcpy_real on the early boot
-- 
cgit v1.2.3-59-g8ed1b


From 103b4cca60d2c8c51f1290cc984b7046ccb8b46d Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 12:35:34 +0100
Subject: s390/unwind: unify task is current checks

Avoid mixture of task == NULL and task == current meaning the same
thing and simply always initialize task with current in unwind_start.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 2 +-
 arch/s390/include/asm/unwind.h     | 3 ++-
 arch/s390/kernel/dumpstack.c       | 4 ----
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index fee40212af11..0ae4bbf7779c 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -38,7 +38,7 @@ static inline unsigned long get_stack_pointer(struct task_struct *task,
 {
 	if (regs)
 		return (unsigned long) kernel_stack_pointer(regs);
-	if (!task || task == current)
+	if (task == current)
 		return current_stack_pointer();
 	return (unsigned long) task->thread.ksp;
 }
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index eaaefeceef6f..a2d8dd766987 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -61,7 +61,8 @@ static inline void unwind_start(struct unwind_state *state,
 				struct pt_regs *regs,
 				unsigned long sp)
 {
-	sp = sp ? : get_stack_pointer(task, regs);
+	task = task ?: current;
+	sp = sp ?: get_stack_pointer(task, regs);
 	__unwind_start(state, task, regs, sp);
 }
 
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 34bdc60c0b11..fc442aec0d96 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -93,8 +93,6 @@ int get_stack_info(unsigned long sp, struct task_struct *task,
 	if (!sp)
 		goto unknown;
 
-	task = task ? : current;
-
 	/* Check per-task stack */
 	if (in_task_stack(sp, task, info))
 		goto recursion_check;
@@ -128,8 +126,6 @@ void show_stack(struct task_struct *task, unsigned long *stack)
 	struct unwind_state state;
 
 	printk("Call Trace:\n");
-	if (!task)
-		task = current;
 	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
 		printk(state.reliable ? " [<%016lx>] %pSR \n" :
 					"([<%016lx>] %pSR)\n",
-- 
cgit v1.2.3-59-g8ed1b


From 7579425777c0d802237e0d59ae395e8cf60723e1 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 12:47:52 +0100
Subject: s390: correct CALL_ON_STACK back_chain saving

Currently CALL_ON_STACK saves r15 as back_chain in the first stack frame of
the stack we about to switch to. But if a function which uses CALL_ON_STACK
calls other function it allocates a stack frame for a callee. In this
case r15 is pointing to a callee stack frame and not a stack frame of
function itself. This results in dummy unwinding entry with random
sp and ip values.

Introduce and utilize current_frame_address macro to get an address of
actual function stack frame.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 0ae4bbf7779c..bb854e33e460 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -62,6 +62,17 @@ struct stack_frame {
 };
 #endif
 
+/*
+ * Unlike current_stack_pointer() which simply returns current value of %r15
+ * current_frame_address() returns function stack frame address, which matches
+ * %r15 upon function invocation. It may differ from %r15 later if function
+ * allocates stack for local variables or new stack frame to call other
+ * functions.
+ */
+#define current_frame_address()						\
+	((unsigned long)__builtin_frame_address(0) -			\
+	 offsetof(struct stack_frame, back_chain))
+
 #define CALL_ARGS_0()							\
 	register unsigned long r2 asm("2")
 #define CALL_ARGS_1(arg1)						\
@@ -95,18 +106,20 @@ struct stack_frame {
 
 #define CALL_ON_STACK(fn, stack, nr, args...)				\
 ({									\
+	unsigned long frame = current_frame_address();			\
 	CALL_ARGS_##nr(args);						\
 	unsigned long prev;						\
 									\
 	asm volatile(							\
 		"	la	%[_prev],0(15)\n"			\
 		"	la	15,0(%[_stack])\n"			\
-		"	stg	%[_prev],%[_bc](15)\n"			\
+		"	stg	%[_frame],%[_bc](15)\n"			\
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
 		  [_stack] "a" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_frame] "d" (frame),					\
 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
 	r2;								\
 })
-- 
cgit v1.2.3-59-g8ed1b


From 7bcaad1f9fac889f5fcd1a383acf7e00d006da41 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 13:12:57 +0100
Subject: s390: avoid misusing CALL_ON_STACK for task stack setup

CALL_ON_STACK is intended to be used for temporary stack switching with
potential return to the caller.

When CALL_ON_STACK is misused to switch from nodat stack to task stack
back_chain information would later lead stack unwinder from task stack into
(per cpu) nodat stack which is reused for other purposes. This would
yield confusing unwinding result or errors.

To avoid that introduce CALL_ON_STACK_NORETURN to be used instead. It
makes sure that back_chain is zeroed and unwinder finishes gracefully
ending up at task pt_regs.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 11 +++++++++++
 arch/s390/kernel/setup.c           |  9 +--------
 arch/s390/kernel/smp.c             |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index bb854e33e460..4f3dd1c86c0d 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -124,4 +124,15 @@ struct stack_frame {
 	r2;								\
 })
 
+#define CALL_ON_STACK_NORETURN(fn, stack)				\
+({									\
+	asm volatile(							\
+		"	la	15,0(%[_stack])\n"			\
+		"	xc	%[_bc](8,15),%[_bc](15)\n"		\
+		"	brasl	14,%[_fn]\n"				\
+		::[_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_stack] "a" (stack), [_fn] "X" (fn));		\
+	BUG();								\
+})
+
 #endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ff291bc63b7..9cbf490fd162 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -355,7 +355,6 @@ early_initcall(async_stack_realloc);
 
 void __init arch_call_rest_init(void)
 {
-	struct stack_frame *frame;
 	unsigned long stack;
 
 	stack = stack_alloc();
@@ -368,13 +367,7 @@ void __init arch_call_rest_init(void)
 	set_task_stack_end_magic(current);
 	stack += STACK_INIT_OFFSET;
 	S390_lowcore.kernel_stack = stack;
-	frame = (struct stack_frame *) stack;
-	memset(frame, 0, sizeof(*frame));
-	/* Branch to rest_init on the new stack, never returns */
-	asm volatile(
-		"	la	15,0(%[_frame])\n"
-		"	jg	rest_init\n"
-		: : [_frame] "a" (frame));
+	CALL_ON_STACK_NORETURN(rest_init, stack);
 }
 
 static void __init setup_lowcore_dat_off(void)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 06dddd7c4290..2794cad9312e 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -876,7 +876,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
 	S390_lowcore.restart_source = -1UL;
 	__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
 	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
-	CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
+	CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack);
 }
 
 /* Upping and downing of CPUs */
-- 
cgit v1.2.3-59-g8ed1b


From 67f5593419878798bb306632cdca0698a2dd3cbd Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 15:53:30 +0100
Subject: s390/unwind: report an error if pt_regs are not on stack

If unwinder is looking at pt_regs which is not on stack then something
went wrong and an error has to be reported rather than successful
unwinding termination.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/unwind_bc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index fa111d3d378f..fd90b6e21663 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -76,7 +76,7 @@ bool unwind_next_frame(struct unwind_state *state)
 			/* No back-chain, look for a pt_regs structure */
 			sp = state->sp + STACK_FRAME_OVERHEAD;
 			if (!on_stack(info, sp, sizeof(struct pt_regs)))
-				goto out_stop;
+				goto out_err;
 			regs = (struct pt_regs *) sp;
 			if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
 				goto out_stop;
-- 
cgit v1.2.3-59-g8ed1b


From 97806dfb6f3838ee4b7bc69e6f160d83eadbc74a Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 15:58:42 +0100
Subject: s390/unwind: make reuse_sp default when unwinding pt_regs

Currently unwinder yields 2 entries when pt_regs are met:
sp="address of pt_regs itself" ip=pt_regs->psw
sp=pt_regs->gprs[15] ip="r14 from stack frame pointed by pt_regs->gprs[15]"

And neither of those 2 states (combination of sp and ip) ever happened.

reuse_sp has been introduced by commit a1d863ac3e10 ("s390/unwind: fix
mixing regs and sp"). reuse_sp=true makes unwinder keen to produce the
following result, when pt_regs are given (as an arg to unwind_start):
sp=pt_regs->gprs[15] ip=pt_regs->psw
sp=pt_regs->gprs[15] ip="r14 from stack frame pointed by pt_regs->gprs[15]"

The first state is an actual state in which a task was when pt_regs were
collected. The second state is marked unreliable and is for debugging
purposes to cover the case when a task has been interrupted in between
stack frame allocation and writing back_chain - in this case r14 might
show an actual caller.

Make unwinder behaviour enabled via reuse_sp=true default and drop the
special case handling.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/unwind.h |  1 -
 arch/s390/kernel/unwind_bc.c   | 21 +++++++--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index a2d8dd766987..5d6c8fe7a271 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,7 +35,6 @@ struct unwind_state {
 	struct task_struct *task;
 	struct pt_regs *regs;
 	unsigned long sp, ip;
-	bool reuse_sp;
 	int graph_idx;
 	bool reliable;
 	bool error;
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index fd90b6e21663..ac6cfab567d1 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -46,16 +46,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	regs = state->regs;
 	if (unlikely(regs)) {
-		if (state->reuse_sp) {
-			sp = state->sp;
-			state->reuse_sp = false;
-		} else {
-			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
-			if (unlikely(outside_of_stack(state, sp))) {
-				if (!update_stack_info(state, sp))
-					goto out_err;
-			}
-		}
+		sp = state->sp;
 		sf = (struct stack_frame *) sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
 		reliable = false;
@@ -81,6 +72,11 @@ bool unwind_next_frame(struct unwind_state *state)
 			if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
 				goto out_stop;
 			ip = READ_ONCE_NOCHECK(regs->psw.addr);
+			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
 			reliable = true;
 		}
 	}
@@ -107,7 +103,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 {
 	struct stack_info *info = &state->stack_info;
 	unsigned long *mask = &state->stack_mask;
-	bool reliable, reuse_sp;
+	bool reliable;
 	struct stack_frame *sf;
 	unsigned long ip;
 
@@ -134,12 +130,10 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	if (regs) {
 		ip = READ_ONCE_NOCHECK(regs->psw.addr);
 		reliable = true;
-		reuse_sp = true;
 	} else {
 		sf = (struct stack_frame *) sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
 		reliable = false;
-		reuse_sp = false;
 	}
 
 	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
@@ -148,6 +142,5 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	state->sp = sp;
 	state->ip = ip;
 	state->reliable = reliable;
-	state->reuse_sp = reuse_sp;
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
-- 
cgit v1.2.3-59-g8ed1b


From cb7948e8c3f18f7ff0ab7d0fa1e6b108d938cdd6 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 17:15:35 +0100
Subject: s390/head64: correct init_task stack setup

Add missing allocation of pt_regs at the bottom of the stack. This
makes it consistent with other stack setup cases and also what stack
unwinder expects.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/head64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index b9e585f528a6..8b88dbbda7df 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -31,7 +31,7 @@ ENTRY(startup_continue)
 #
 	larl	%r14,init_task
 	stg	%r14,__LC_CURRENT
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
+	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
 #ifdef CONFIG_KASAN
 	brasl	%r14,kasan_early_init
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From e76e69611e944ecc38aaf8fe3a7bebdc3c5daf84 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 16:49:13 +0100
Subject: s390/unwind: stop gracefully at task pt_regs

Consider reaching task pt_regs graceful unwinder termination. Task
pt_regs itself never contains a valid state to which a task might return
within the kernel context (user task pt_regs is a special case). Since
we already avoid printing user task pt_regs and in most cases we don't
even bother filling task pt_regs psw and r15 with something reasonable
simply skip task pt_regs altogether. With this change unwind_error() now
accurately represent whether unwinder reached task pt_regs successfully
or failed along the way.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/unwind_bc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index ac6cfab567d1..c5ebb8a4cdd6 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,6 +36,12 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp)
 	return true;
 }
 
+static inline bool is_task_pt_regs(struct unwind_state *state,
+				   struct pt_regs *regs)
+{
+	return task_pt_regs(state->task) == regs;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct stack_info *info = &state->stack_info;
@@ -69,7 +75,7 @@ bool unwind_next_frame(struct unwind_state *state)
 			if (!on_stack(info, sp, sizeof(struct pt_regs)))
 				goto out_err;
 			regs = (struct pt_regs *) sp;
-			if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
+			if (is_task_pt_regs(state, regs))
 				goto out_stop;
 			ip = READ_ONCE_NOCHECK(regs->psw.addr);
 			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
-- 
cgit v1.2.3-59-g8ed1b


From a9f2f6865d784477e1c7b59269d3a384abafd9ca Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 19 Nov 2019 12:30:53 +0100
Subject: s390/kaslr: store KASLR offset for early dumps

The KASLR offset is added to vmcoreinfo in arch_crash_save_vmcoreinfo(),
so that it can be found by crash when processing kernel dumps.

However, arch_crash_save_vmcoreinfo() is called during a subsys_initcall,
so if the kernel crashes before that, we have no vmcoreinfo and no KASLR
offset.

Fix this by storing the KASLR offset in the lowcore, where the vmcore_info
pointer will be stored, and where it can be found by crash. In order to
make it distinguishable from a real vmcore_info pointer, mark it as uneven
(KASLR offset itself is aligned to THREAD_SIZE).

When arch_crash_save_vmcoreinfo() stores the real vmcore_info pointer in
the lowcore, it overwrites the KASLR offset. At that point, the KASLR
offset is not yet added to vmcoreinfo, so we also need to move the
mem_assign_absolute() behind the vmcoreinfo_append_str().

Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)")
Cc: <stable@vger.kernel.org> # v5.2+
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/boot/startup.c         | 5 +++++
 arch/s390/kernel/machine_kexec.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index fbd341ea03b8..3b3a11f95269 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -170,6 +170,11 @@ void startup_kernel(void)
 		handle_relocs(__kaslr_offset);
 
 	if (__kaslr_offset) {
+		/*
+		 * Save KASLR offset for early dumps, before vmcore_info is set.
+		 * Mark as uneven to distinguish from real vmcore_info pointer.
+		 */
+		S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
 		/* Clear non-relocated kernel */
 		if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
 			memset(img, 0, vmlinux.image_size);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index dcaadceaf6ef..cb8b1cc285c9 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -256,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 	vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
 	vmcoreinfo_append_str("EDMA=%lx\n", __edma);
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)
-- 
cgit v1.2.3-59-g8ed1b


From 532da3de70b207be2b98cd5fb966e3915c8872c3 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 21 Nov 2019 15:46:02 +0100
Subject: s390/cpum_sf: Replace function name in debug statements

Replace hard coded function names in debug statements
by the "%s ...", __func__ construct suggested by checkpatch.pl
script.  Use consistent debug print format of the form variable
blank value. Also add leading 0x for all hex values.
Print allocated page addresses consistantly as hex numbers
with leading 0x.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/cpu_mf.h  |   2 +-
 arch/s390/kernel/perf_cpum_sf.c | 107 +++++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 819803a97c2b..0d90cbeb89b4 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -313,7 +313,7 @@ static inline unsigned long *trailer_entry_ptr(unsigned long v)
 	return (unsigned long *) ret;
 }
 
-/* Return if the entry in the sample data block table (sdbt)
+/* Return true if the entry in the sample data block table (sdbt)
  * is a link to the next sdbt */
 static inline int is_link_entry(unsigned long *s)
 {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 69506fdbd9a1..4414094550a4 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
 		}
 	}
 
-	debug_sprintf_event(sfdbg, 5, "%s freed sdbt %p\n", __func__,
-			    sfb->sdbt);
+	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+			    (unsigned long)sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -213,9 +213,10 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
 		debug_sprintf_event(sfdbg, 3, "%s: "
-				    "sampling buffer is not linked: origin %p"
-				    " tail %p\n", __func__,
-				    (void *)sfb->sdbt, (void *)tail);
+				    "sampling buffer is not linked: origin %#lx"
+				    " tail %#lx\n", __func__,
+				    (unsigned long)sfb->sdbt,
+				    (unsigned long)tail);
 		return -EINVAL;
 	}
 
@@ -251,8 +252,8 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 	*tail = (unsigned long) sfb->sdbt + 1;
 	sfb->tail = tail;
 
-	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
-			    " settings: sdbt %lu sdb %lu\n",
+	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+			    " settings: sdbt %lu sdb %lu\n", __func__,
 			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
@@ -292,12 +293,13 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
 	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
 	if (rc) {
 		free_sampling_buffer(sfb);
-		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
-			"realloc_sampling_buffer failed with rc %i\n", rc);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+			"realloc_sampling_buffer failed with rc %i\n",
+			__func__, rc);
 	} else
 		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear %p dear %p\n",
-			sfb->sdbt, (void *)*sfb->sdbt);
+			"%s: tear %#lx dear %#lx\n", __func__,
+			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
 	return rc;
 }
 
@@ -465,8 +467,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow %llu ratio %lu"
-			    " num %lu\n", OVERFLOW_REG(hwc), ratio, num);
+	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+			    __func__, OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -504,13 +506,13 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
 	 */
 	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 	if (rc)
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
-				    "failed with rc %i\n", rc);
+		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+				    __func__, rc);
 
 	if (sfb_has_pending_allocs(sfb, hwc))
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+		debug_sprintf_event(sfdbg, 5, "%s: "
 				    "req %lu alloc %lu remaining %lu\n",
-				    num, sfb->num_sdb - num_old,
+				    __func__, num, sfb->num_sdb - num_old,
 				    sfb_pending_allocs(sfb, hwc));
 }
 
@@ -698,9 +700,9 @@ static unsigned long getrate(bool freq, unsigned long sample,
 		 */
 		if (sample_rate_to_freq(si, rate) >
 		    sysctl_perf_event_sample_rate) {
-			debug_sprintf_event(sfdbg, 1,
+			debug_sprintf_event(sfdbg, 1, "%s: "
 					    "Sampling rate exceeds maximum "
-					    "perf sample rate\n");
+					    "perf sample rate\n", __func__);
 			rate = 0;
 		}
 	}
@@ -745,10 +747,9 @@ static int __hw_perf_event_init_rate(struct perf_event *event,
 	attr->sample_period = rate;
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
-	debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
-			    "cpu:%d period:%#llx freq:%d,%#lx\n", event->cpu,
-			    event->attr.sample_period, event->attr.freq,
-			    SAMPLE_FREQ_MODE(hwc));
+	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+			    __func__, event->cpu, event->attr.sample_period,
+			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
 	return 0;
 }
 
@@ -973,12 +974,11 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 	/* Load current program parameter */
 	lpp(&S390_lowcore.lpp);
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es %i cs %i ed %i cd %i "
-			    "interval %#lx tear %p dear %p\n",
+	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %#lx dear %#lx\n", __func__,
 			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
 			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
-			    (void *) cpuhw->lsctl.tear,
-			    (void *) cpuhw->lsctl.dear);
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 }
 
 static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1019,8 +1019,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 			cpuhw->lsctl.dear = si.dear;
 		}
 	} else
-		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
-				    "qsi() failed with err %i\n", err);
+		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+				    __func__, err);
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
@@ -1265,9 +1265,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "%s: sdbt %p "
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
 				    "overflow %llu timestamp %#llx\n",
-				    __func__, sdbt, te->overflow,
+				    __func__, (unsigned long)sdbt, te->overflow,
 				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
@@ -1312,8 +1312,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 						 sampl_overflow, 1 + num_sdb);
 	if (sampl_overflow || event_overflow)
 		debug_sprintf_event(sfdbg, 4, "%s: "
-				    "overflow stats: sample %llu event %llu\n",
-				    __func__, sampl_overflow, event_overflow);
+				    "overflows: sample %llu event %llu"
+				    " total %llu num_sdb %llu\n",
+				    __func__, sampl_overflow, event_overflow,
+				    OVERFLOW_REG(hwc), num_sdb);
 }
 
 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1424,10 +1426,10 @@ static int aux_output_begin(struct perf_output_handle *handle,
 	cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
 	cpuhw->lsctl.dear = aux->sdb_index[head];
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
+	debug_sprintf_event(sfdbg, 6, "%s: "
 			    "head->alert_mark->empty_mark (num_alert, range)"
 			    "[%#lx -> %#lx -> %#lx] (%#lx, %#lx) "
-			    "tear index %#lx, tear %#lx dear %#lx\n",
+			    "tear index %#lx, tear %#lx dear %#lx\n", __func__,
 			    aux->head, aux->alert_mark, aux->empty_mark,
 			    AUX_SDB_NUM_ALERT(aux), range,
 			    head / CPUM_SF_SDB_PER_TABLE,
@@ -1571,7 +1573,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			pr_err("The AUX buffer with %lu pages for the "
 			       "diagnostic-sampling mode is full\n",
 				num_sdb);
-			debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+			debug_sprintf_event(sfdbg, 1,
+					    "%s: AUX buffer used up\n",
+					    __func__);
 			break;
 		}
 		if (WARN_ON_ONCE(!aux))
@@ -1594,23 +1598,25 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
 			       "pages to overflow\n", num_sdb);
-			debug_sprintf_event(sfdbg, 1, "head %#lx range %#lx "
-					    "overflow %#llx\n",
+			debug_sprintf_event(sfdbg, 1, "%s: head %#lx range %#lx "
+					    "overflow %#llx\n", __func__,
 					    aux->head, range, overflow);
 		} else {
 			size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "head %#lx alert %#lx "
+			debug_sprintf_event(sfdbg, 6, "%s: head %#lx alert %#lx "
 					    "already full, try another\n",
+					    __func__,
 					    aux->head, aux->alert_mark);
 		}
 	}
 
 	if (done)
-		debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
+		debug_sprintf_event(sfdbg, 6, "%s: aux_reset_buffer "
 				    "[%#lx -> %#lx -> %#lx] (%#lx, %#lx)\n",
-				    aux->head, aux->alert_mark, aux->empty_mark,
-				    AUX_SDB_NUM_ALERT(aux), range);
+				    __func__, aux->head, aux->alert_mark,
+				    aux->empty_mark, AUX_SDB_NUM_ALERT(aux),
+				    range);
 }
 
 /*
@@ -1633,8 +1639,8 @@ static void aux_buffer_free(void *data)
 	kfree(aux->sdb_index);
 	kfree(aux);
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
-			    "%lu SDBTs\n", num_sdbt);
+	debug_sprintf_event(sfdbg, 4, "%s: free "
+			    "%lu SDBTs\n", __func__, num_sdbt);
 }
 
 static void aux_sdb_init(unsigned long sdb)
@@ -1742,9 +1748,8 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
 	 */
 	aux->empty_mark = sfb->num_sdb - 1;
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
-			    " and %lu SDBs\n",
-			    sfb->num_sdbt, sfb->num_sdb);
+	debug_sprintf_event(sfdbg, 4, "%s: setup %lu SDBTs and %lu SDBs\n",
+			    __func__, sfb->num_sdbt, sfb->num_sdb);
 
 	return aux;
 
@@ -1797,9 +1802,9 @@ static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
 	event->attr.sample_period = rate;
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
-	debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
-			    "cpu:%d value:%#llx period:%#llx freq:%d\n",
-			    event->cpu, value,
+	debug_sprintf_event(sfdbg, 4, "%s:"
+			    " cpu %d value %#llx period %#llx freq %d\n",
+			    __func__, event->cpu, value,
 			    event->attr.sample_period, do_freq);
 	return 0;
 }
@@ -2030,7 +2035,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 
 	/* Report measurement alerts only for non-PRA codes */
 	if (alert != CPU_MF_INT_SF_PRA)
-		debug_sprintf_event(sfdbg, 6, "measurement alert: %#x\n",
+		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
 				    alert);
 
 	/* Sampling authorization change request */
-- 
cgit v1.2.3-59-g8ed1b


From c17a7c6ee8177e0da998784c06f37fc093507c5b Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 22 Nov 2019 13:42:55 +0100
Subject: s390/cpum_sf: Remove unnecessary check for pending SDBs

In interrupt handling the function extend_sampling_buffer()
is called after checking for a possibly extension.
This check is not necessary as the called function itself
performs this check again.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 4414094550a4..dc0ac098a465 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -952,8 +952,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
 			 * buffer extents
 			 */
 			sfb_account_overflows(cpuhw, hwc);
-			if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
-				extend_sampling_buffer(&cpuhw->sfb, hwc);
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
 		}
 		/* Rate may be adjusted with ioctl() */
 		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
-- 
cgit v1.2.3-59-g8ed1b


From 7dd6b199df46e871e6e0d0cd7e4f71dc07dfd53c Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 22 Nov 2019 15:29:54 +0100
Subject: s390/cpum_sf: Use TEAR_REG macro consistantly

The macro TEAR_REG() saves the last used SDBT address
in the perf_hw_event structure. This is also done
by function hw_reset_registers() which is a one-liner
and simply uses macro TEAR_REG(). Remove function
hw_reset_registers(), which is only used one time and use
macro TEAR_REG() instead. This macro is used throughout
the code anyway.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index dc0ac098a465..cd9fb45eebd2 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -602,13 +602,6 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
 	local64_set(&hwc->period_left, hwc->sample_period);
 }
 
-static void hw_reset_registers(struct hw_perf_event *hwc,
-			       unsigned long *sdbt_origin)
-{
-	/* (Re)set to first sample-data-block-table */
-	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-}
-
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
 				   unsigned long rate)
 {
@@ -1879,7 +1872,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
 	if (!SAMPL_DIAG_MODE(&event->hw)) {
 		cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
 		cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
-		hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+		TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
 	}
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
-- 
cgit v1.2.3-59-g8ed1b


From 247f265fa502e7b17a0cb0cc330e055a36aafce4 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 22 Nov 2019 16:43:15 +0100
Subject: s390/cpum_sf: Check for SDBT and SDB consistency

Each SBDT is located at a 4KB page and contains 512 entries.
Each entry of a SDBT points to a SDB, a 4KB page containing
sampled data. The last entry is a link to another SDBT page.

When an event is created the function sequence executed is:

  __hw_perf_event_init()
  +--> allocate_buffers()
       +--> realloc_sampling_buffers()
	    +---> alloc_sample_data_block()

Both functions realloc_sampling_buffers() and
alloc_sample_data_block() allocate pages and the allocation
can fail. This is handled correctly and all allocated
pages are freed and error -ENOMEM is returned to the
top calling function. Finally the event is not created.

Once the event has been created, the amount of initially
allocated SDBT and SDB can be too low. This is detected
during measurement interrupt handling, where the amount
of lost samples is calculated. If the number of lost samples
is too high considering sampling frequency and already allocated
SBDs, the number of SDBs is enlarged during the next execution
of cpumsf_pmu_enable().

If more SBDs need to be allocated, functions

       realloc_sampling_buffers()
       +---> alloc-sample_data_block()

are called to allocate more pages. Page allocation may fail
and the returned error is ignored. A SDBT and SDB setup
already exists.

However the modified SDBTs and SDBs might end up in a situation
where the first entry of an SDBT does not point to an SDB,
but another SDBT, basicly an SBDT without payload.
This can not be handled by the interrupt handler, where an SDBT
must have at least one entry pointing to an SBD.

Add a check to avoid SDBTs with out payload (SDBs) when enlarging
the buffer setup.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index cd9fb45eebd2..c07fdcd73726 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 				   unsigned long num_sdb, gfp_t gfp_flags)
 {
 	int i, rc;
-	unsigned long *new, *tail;
+	unsigned long *new, *tail, *tail_prev = NULL;
 
 	if (!sfb->sdbt || !sfb->tail)
 		return -EINVAL;
@@ -233,6 +233,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 			sfb->num_sdbt++;
 			/* Link current page to tail of chain */
 			*tail = (unsigned long)(void *) new + 1;
+			tail_prev = tail;
 			tail = new;
 		}
 
@@ -242,10 +243,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
 		 * issue, a new realloc call (if required) might succeed.
 		 */
 		rc = alloc_sample_data_block(tail, gfp_flags);
-		if (rc)
+		if (rc) {
+			/* Undo last SDBT. An SDBT with no SDB at its first
+			 * entry but with an SDBT entry instead can not be
+			 * handled by the interrupt handler code.
+			 * Avoid this situation.
+			 */
+			if (tail_prev) {
+				sfb->num_sdbt--;
+				free_page((unsigned long) new);
+				tail = tail_prev;
+			}
 			break;
+		}
 		sfb->num_sdb++;
 		tail++;
+		tail_prev = new = NULL;	/* Allocated at least one SBD */
 	}
 
 	/* Link sampling buffer to its origin */
-- 
cgit v1.2.3-59-g8ed1b


From 794b8846dcdc0c6e23bbf4e5283415cab0caa9ac Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Thu, 28 Nov 2019 09:30:00 +0100
Subject: s390/pci: add error message for UID collision

When UID checking was turned off during runtime in the underlying
hypervisor, a PCI device may be attached with the same UID. This is
already detected but happens silently. Add an error message so it can
more easily be understood why a device was not added.

Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/pci/pci.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index c7fea9bea8cb..4901f5d1c479 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/jump_label.h>
 #include <linux/pci.h>
+#include <linux/printk.h>
 
 #include <asm/isc.h>
 #include <asm/airq.h>
@@ -659,6 +660,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
 		spin_lock(&zpci_domain_lock);
 		if (test_bit(zdev->domain, zpci_domain)) {
 			spin_unlock(&zpci_domain_lock);
+			pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
+				zdev->fid, zdev->domain);
 			return -EEXIST;
 		}
 		set_bit(zdev->domain, zpci_domain);
-- 
cgit v1.2.3-59-g8ed1b


From d497b7ec836d2c900993f1c43b2ddff5f8a6b129 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Thu, 28 Nov 2019 09:31:52 +0100
Subject: s390/pci: add error message on device number limit

The config option CONFIG_PCI_NR_FUNCTIONS sets a limit on the number of
PCI functions we can support. Previously on reaching this limit there
was no indication why newly attached devices are not recognized by Linux
which could be quite confusing. Thus this patch adds a pr_err() for this
case.

Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/pci/pci.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 4901f5d1c479..2e377f2b7b6d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -673,6 +673,8 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
 	zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
 	if (zdev->domain == ZPCI_NR_DEVICES) {
 		spin_unlock(&zpci_domain_lock);
+		pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+			zdev->fid, ZPCI_NR_DEVICES);
 		return -ENOSPC;
 	}
 	set_bit(zdev->domain, zpci_domain);
-- 
cgit v1.2.3-59-g8ed1b


From adcfb8cdc910bdd0b5d52d2ba88103af93dc43d3 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 26 Nov 2019 17:40:04 +0100
Subject: s390/unwind: always inline get_stack_pointer

Always inline get_stack_pointer() to avoid potential problems
due to compiler inlining decisions, i.e. getting stack pointer of
get_stack_pointer() itself which is later reused.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 4f3dd1c86c0d..4725315a9cb1 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -33,8 +33,8 @@ static inline bool on_stack(struct stack_info *info,
 	return addr >= info->begin && addr + len <= info->end;
 }
 
-static inline unsigned long get_stack_pointer(struct task_struct *task,
-					      struct pt_regs *regs)
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
 {
 	if (regs)
 		return (unsigned long) kernel_stack_pointer(regs);
-- 
cgit v1.2.3-59-g8ed1b


From badbf39790798283f2424828e7b7bec3962f1e02 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 17 Oct 2019 15:09:08 +0200
Subject: s390/unwind: add a test for the internal API

unwind_for_each_frame can take at least 8 different sets of parameters.
Add a test to make sure they all are handled in a sane way.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Co-developed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig           |  14 +++
 arch/s390/lib/Makefile      |   3 +
 arch/s390/lib/test_unwind.c | 231 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+)
 create mode 100644 arch/s390/lib/test_unwind.c

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f0df9e48e651..2528eb9d01fb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -1018,3 +1018,17 @@ config S390_GUEST
 	  the KVM hypervisor.
 
 endmenu
+
+menu "Selftests"
+
+config S390_UNWIND_SELFTEST
+	def_tristate n
+	prompt "Test unwind functions"
+	help
+	  This option enables s390 specific stack unwinder testing kernel
+	  module. This option is not useful for distributions or general
+	  kernels, but only for kernel developers working on architecture code.
+
+	  Say N if you are unsure.
+
+endmenu
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index d7c218e8b559..28fd66d558ff 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -11,3 +11,6 @@ lib-$(CONFIG_UPROBES) += probes.o
 # Instrumenting memory accesses to __user data (in different address space)
 # produce false positives
 KASAN_SANITIZE_uaccess.o := n
+
+obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
+CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
new file mode 100644
index 000000000000..5636da941f1f
--- /dev/null
+++ b/arch/s390/lib/test_unwind.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test module for unwind_for_each_frame
+ */
+
+#define pr_fmt(fmt) "test_unwind: " fmt
+#include <asm/unwind.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/wait.h>
+
+#define BT_BUF_SIZE (PAGE_SIZE * 4)
+
+/*
+ * To avoid printk line limit split backtrace by lines
+ */
+static void print_backtrace(char *bt)
+{
+	char *p;
+
+	while (true) {
+		p = strsep(&bt, "\n");
+		if (!p)
+			break;
+		pr_err("%s\n", p);
+	}
+}
+
+/*
+ * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result
+ * contains unwindme_func2 followed by unwindme_func1.
+ */
+static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
+				unsigned long sp)
+{
+	int frame_count, prev_is_func2, seen_func2_func1;
+	const int max_frames = 128;
+	struct unwind_state state;
+	size_t bt_pos = 0;
+	int ret = 0;
+	char *bt;
+
+	bt = kmalloc(BT_BUF_SIZE, GFP_KERNEL);
+	if (!bt) {
+		pr_err("failed to allocate backtrace buffer\n");
+		return -ENOMEM;
+	}
+	/* Unwind. */
+	frame_count = 0;
+	prev_is_func2 = 0;
+	seen_func2_func1 = 0;
+	unwind_for_each_frame(&state, task, regs, sp) {
+		unsigned long addr = unwind_get_return_address(&state);
+		char sym[KSYM_SYMBOL_LEN];
+
+		if (!addr || frame_count == max_frames)
+			break;
+		sprint_symbol(sym, addr);
+		if (bt_pos < BT_BUF_SIZE) {
+			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos, "%s\n", sym);
+			if (bt_pos >= BT_BUF_SIZE)
+				pr_err("backtrace buffer is too small\n");
+		}
+		frame_count += 1;
+		if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
+			seen_func2_func1 = 1;
+		prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+	}
+
+	/* Check the results. */
+	if (!seen_func2_func1) {
+		pr_err("unwindme_func2 and unwindme_func1 not found\n");
+		ret = -EINVAL;
+	}
+	if (frame_count == max_frames) {
+		pr_err("Maximum number of frames exceeded\n");
+		ret = -EINVAL;
+	}
+	if (ret)
+		print_backtrace(bt);
+	kfree(bt);
+	return ret;
+}
+
+/* State of the task being unwound. */
+struct unwindme {
+	int flags;
+	struct completion task_ready;
+	wait_queue_head_t task_wq;
+	unsigned long sp;
+};
+
+/* Values of unwindme.flags. */
+#define UWM_DEFAULT	0x0
+#define UWM_THREAD	0x1	/* Unwind a separate task. */
+#define UWM_REGS	0x2	/* Pass regs to test_unwind(). */
+#define UWM_SP		0x4	/* Pass sp to test_unwind(). */
+#define UWM_CALLER	0x8	/* Unwind starting from caller. */
+
+static __always_inline unsigned long get_psw_addr(void)
+{
+	unsigned long psw_addr;
+
+	asm volatile(
+		"basr	%[psw_addr],0\n"
+		: [psw_addr] "=d" (psw_addr));
+	return psw_addr;
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func4(struct unwindme *u)
+{
+	if (!(u->flags & UWM_CALLER))
+		u->sp = current_frame_address();
+	if (u->flags & UWM_THREAD) {
+		complete(&u->task_ready);
+		wait_event(u->task_wq, kthread_should_park());
+		kthread_parkme();
+		return 0;
+	} else {
+		struct pt_regs regs;
+
+		memset(&regs, 0, sizeof(regs));
+		regs.psw.addr = get_psw_addr();
+		regs.gprs[15] = current_stack_pointer();
+		return test_unwind(NULL,
+				   (u->flags & UWM_REGS) ? &regs : NULL,
+				   (u->flags & UWM_SP) ? u->sp : 0);
+	}
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func3(struct unwindme *u)
+{
+	u->sp = current_frame_address();
+	return unwindme_func4(u);
+}
+
+/* This function must appear in the backtrace. */
+static noinline int unwindme_func2(struct unwindme *u)
+{
+	return unwindme_func3(u);
+}
+
+/* This function must follow unwindme_func2 in the backtrace. */
+static noinline int unwindme_func1(void *u)
+{
+	return unwindme_func2((struct unwindme *)u);
+}
+
+/* Spawns a task and passes it to test_unwind(). */
+static int test_unwind_task(struct unwindme *u)
+{
+	struct task_struct *task;
+	int ret;
+
+	/* Initialize thread-related fields. */
+	init_completion(&u->task_ready);
+	init_waitqueue_head(&u->task_wq);
+
+	/*
+	 * Start the task and wait until it reaches unwindme_func4() and sleeps
+	 * in (task_ready, unwind_done] range.
+	 */
+	task = kthread_run(unwindme_func1, u, "%s", __func__);
+	if (IS_ERR(task)) {
+		pr_err("kthread_run() failed\n");
+		return PTR_ERR(task);
+	}
+	/*
+	 * Make sure task reaches unwindme_func4 before parking it,
+	 * we might park it before kthread function has been executed otherwise
+	 */
+	wait_for_completion(&u->task_ready);
+	kthread_park(task);
+	/* Unwind. */
+	ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0);
+	kthread_stop(task);
+	return ret;
+}
+
+static int test_unwind_flags(int flags)
+{
+	struct unwindme u;
+
+	u.flags = flags;
+	if (u.flags & UWM_THREAD)
+		return test_unwind_task(&u);
+	else
+		return unwindme_func1(&u);
+}
+
+static int test_unwind_init(void)
+{
+	int ret = 0;
+
+#define TEST(flags)							\
+do {									\
+	pr_info("[ RUN      ] " #flags "\n");				\
+	if (!test_unwind_flags((flags))) {				\
+		pr_info("[       OK ] " #flags "\n");			\
+	} else {							\
+		pr_err("[  FAILED  ] " #flags "\n");			\
+		ret = -EINVAL;						\
+	}								\
+} while (0)
+
+	TEST(UWM_DEFAULT);
+	TEST(UWM_SP);
+	TEST(UWM_REGS);
+	TEST(UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_THREAD);
+	TEST(UWM_THREAD | UWM_SP);
+	TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
+#undef TEST
+
+	return ret;
+}
+
+static void test_unwind_exit(void)
+{
+}
+
+module_init(test_unwind_init);
+module_exit(test_unwind_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From f44fa79b104b56d53d33ae43e69bab98b63d4783 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 17:37:50 +0100
Subject: s390/test_unwind: require that unwinding ended successfully

Currently unwinder test passes if unwinding results contain unwindme_func2
and unwindme_func1 functions.
Now that unwinder reports success upon reaching task pt_regs, check
that unwinding ended successfully in every test.

Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/test_unwind.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 5636da941f1f..2839f8cb691d 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -71,6 +71,10 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 	}
 
 	/* Check the results. */
+	if (unwind_error(&state)) {
+		pr_err("unwind error\n");
+		ret = -EINVAL;
+	}
 	if (!seen_func2_func1) {
 		pr_err("unwindme_func2 and unwindme_func1 not found\n");
 		ret = -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 4ac24c092b4eef69b2436ee4d478500dc886e8b5 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 25 Nov 2019 13:34:59 +0100
Subject: s390: fix register clobbering in CALL_ON_STACK

CALL_ON_STACK defines and initializes register variables. Inline
assembly which follows might trigger compiler to generate memory access
for "stack" argument (e.g. in case of S390_lowcore.nodat_stack). This
memory access produces a function call under kasan with outline
instrumentation which clobbers registers.

Switch "stack" argument in CALL_ON_STACK helper to use memory reference
constraint and perform load instead.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 4725315a9cb1..ee056f4a4fa3 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -112,12 +112,12 @@ struct stack_frame {
 									\
 	asm volatile(							\
 		"	la	%[_prev],0(15)\n"			\
-		"	la	15,0(%[_stack])\n"			\
+		"	lg	15,%[_stack]\n"				\
 		"	stg	%[_frame],%[_bc](15)\n"			\
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
-		  [_stack] "a" (stack),					\
+		  [_stack] "R" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
 		  [_frame] "d" (frame),					\
 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
-- 
cgit v1.2.3-59-g8ed1b


From 7868249fbbc8125b82b83d99d33b23897ae7d9ab Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 18:22:06 +0100
Subject: s390/test_unwind: add CALL_ON_STACK tests

Add CALL_ON_STACK helper testing. Tests make sure that we can unwind from
switched stack to original one up to task pt_regs (nodat -> task stack).

UWM_SWITCH_STACK could not be used together with UWM_THREAD because
get_stack_info explicitly restricts unwinding to task stack if
task != current.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/test_unwind.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 2839f8cb691d..687a6922beda 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -43,7 +43,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 	int ret = 0;
 	char *bt;
 
-	bt = kmalloc(BT_BUF_SIZE, GFP_KERNEL);
+	bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC);
 	if (!bt) {
 		pr_err("failed to allocate backtrace buffer\n");
 		return -ENOMEM;
@@ -98,11 +98,12 @@ struct unwindme {
 };
 
 /* Values of unwindme.flags. */
-#define UWM_DEFAULT	0x0
-#define UWM_THREAD	0x1	/* Unwind a separate task. */
-#define UWM_REGS	0x2	/* Pass regs to test_unwind(). */
-#define UWM_SP		0x4	/* Pass sp to test_unwind(). */
-#define UWM_CALLER	0x8	/* Unwind starting from caller. */
+#define UWM_DEFAULT		0x0
+#define UWM_THREAD		0x1	/* Unwind a separate task. */
+#define UWM_REGS		0x2	/* Pass regs to test_unwind(). */
+#define UWM_SP			0x4	/* Pass sp to test_unwind(). */
+#define UWM_CALLER		0x8	/* Unwind starting from caller. */
+#define UWM_SWITCH_STACK	0x10	/* Use CALL_ON_STACK. */
 
 static __always_inline unsigned long get_psw_addr(void)
 {
@@ -146,7 +147,16 @@ static noinline int unwindme_func3(struct unwindme *u)
 /* This function must appear in the backtrace. */
 static noinline int unwindme_func2(struct unwindme *u)
 {
-	return unwindme_func3(u);
+	int rc;
+
+	if (u->flags & UWM_SWITCH_STACK) {
+		preempt_disable();
+		rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
+		preempt_enable();
+		return rc;
+	} else {
+		return unwindme_func3(u);
+	}
 }
 
 /* This function must follow unwindme_func2 in the backtrace. */
@@ -215,9 +225,11 @@ do {									\
 	TEST(UWM_DEFAULT);
 	TEST(UWM_SP);
 	TEST(UWM_REGS);
+	TEST(UWM_SWITCH_STACK);
 	TEST(UWM_SP | UWM_REGS);
 	TEST(UWM_CALLER | UWM_SP);
 	TEST(UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
 	TEST(UWM_THREAD);
 	TEST(UWM_THREAD | UWM_SP);
 	TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
-- 
cgit v1.2.3-59-g8ed1b


From 0610154650f161d56a0bef0d9678ae1de7360019 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 18:52:40 +0100
Subject: s390/test_unwind: print verbose unwinding results

Add stack name, sp and reliable information into test unwinding
results. Also consider ip outside of kernel text as failure if the
state is reported reliable.

Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/dumpstack.c |  1 +
 arch/s390/lib/test_unwind.c  | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index fc442aec0d96..d74e21a23703 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,6 +38,7 @@ const char *stack_type_name(enum stack_type type)
 		return "unknown";
 	}
 }
+EXPORT_SYMBOL_GPL(stack_type_name);
 
 static inline bool in_stack(unsigned long sp, struct stack_info *info,
 			    enum stack_type type, unsigned long low,
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 687a6922beda..db94e657c056 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -56,11 +56,19 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 		unsigned long addr = unwind_get_return_address(&state);
 		char sym[KSYM_SYMBOL_LEN];
 
-		if (!addr || frame_count == max_frames)
+		if (frame_count++ == max_frames)
 			break;
+		if (state.reliable && !addr) {
+			pr_err("unwind state reliable but addr is 0\n");
+			return -EINVAL;
+		}
 		sprint_symbol(sym, addr);
 		if (bt_pos < BT_BUF_SIZE) {
-			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos, "%s\n", sym);
+			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos,
+					   state.reliable ? " [%-7s%px] %pSR\n" :
+							    "([%-7s%px] %pSR)\n",
+					   stack_type_name(state.stack_info.type),
+					   (void *)state.sp, (void *)state.ip);
 			if (bt_pos >= BT_BUF_SIZE)
 				pr_err("backtrace buffer is too small\n");
 		}
-- 
cgit v1.2.3-59-g8ed1b


From e7409367abe54ad04868552b9d9fe4a56acc753d Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 22 Nov 2019 19:18:58 +0100
Subject: s390/test_unwind: add irq context tests

Add unwinding from irq context tests. Unwinder should be able to unwind
through irq stack to task stack up to task pt_regs.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/test_unwind.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index db94e657c056..72fa745281f0 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -11,6 +11,8 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/wait.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
 
 #define BT_BUF_SIZE (PAGE_SIZE * 4)
 
@@ -100,11 +102,15 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
 /* State of the task being unwound. */
 struct unwindme {
 	int flags;
+	int ret;
+	struct task_struct *task;
 	struct completion task_ready;
 	wait_queue_head_t task_wq;
 	unsigned long sp;
 };
 
+static struct unwindme *unwindme;
+
 /* Values of unwindme.flags. */
 #define UWM_DEFAULT		0x0
 #define UWM_THREAD		0x1	/* Unwind a separate task. */
@@ -112,6 +118,7 @@ struct unwindme {
 #define UWM_SP			0x4	/* Pass sp to test_unwind(). */
 #define UWM_CALLER		0x8	/* Unwind starting from caller. */
 #define UWM_SWITCH_STACK	0x10	/* Use CALL_ON_STACK. */
+#define UWM_IRQ			0x20	/* Unwind from irq context. */
 
 static __always_inline unsigned long get_psw_addr(void)
 {
@@ -173,6 +180,34 @@ static noinline int unwindme_func1(void *u)
 	return unwindme_func2((struct unwindme *)u);
 }
 
+static void unwindme_irq_handler(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	struct unwindme *u = READ_ONCE(unwindme);
+
+	if (u && u->task == current) {
+		unwindme = NULL;
+		u->task = NULL;
+		u->ret = unwindme_func1(u);
+	}
+}
+
+static int test_unwind_irq(struct unwindme *u)
+{
+	preempt_disable();
+	if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
+		pr_info("Couldn't reqister external interrupt handler");
+		return -1;
+	}
+	u->task = current;
+	unwindme = u;
+	udelay(1);
+	unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler);
+	preempt_enable();
+	return u->ret;
+}
+
 /* Spawns a task and passes it to test_unwind(). */
 static int test_unwind_task(struct unwindme *u)
 {
@@ -211,6 +246,8 @@ static int test_unwind_flags(int flags)
 	u.flags = flags;
 	if (u.flags & UWM_THREAD)
 		return test_unwind_task(&u);
+	else if (u.flags & UWM_IRQ)
+		return test_unwind_irq(&u);
 	else
 		return unwindme_func1(&u);
 }
@@ -241,6 +278,14 @@ do {									\
 	TEST(UWM_THREAD);
 	TEST(UWM_THREAD | UWM_SP);
 	TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ);
+	TEST(UWM_IRQ | UWM_SWITCH_STACK);
+	TEST(UWM_IRQ | UWM_SP);
+	TEST(UWM_IRQ | UWM_REGS);
+	TEST(UWM_IRQ | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
 #undef TEST
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From de6921ccbd0fb2882a1f615a6d3cdfbdcd64532c Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 25 Nov 2019 14:07:40 +0100
Subject: s390/test_unwind: add program check context tests

Add unwinding from program check handler tests. Unwinder should be able
to unwind through pt_regs stored by program check handler on task stack.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/test_unwind.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 72fa745281f0..bda7ac0ddd29 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/kprobes.h>
 #include <linux/wait.h>
 #include <asm/irq.h>
 #include <asm/delay.h>
@@ -119,6 +120,7 @@ static struct unwindme *unwindme;
 #define UWM_CALLER		0x8	/* Unwind starting from caller. */
 #define UWM_SWITCH_STACK	0x10	/* Use CALL_ON_STACK. */
 #define UWM_IRQ			0x20	/* Unwind from irq context. */
+#define UWM_PGM			0x40	/* Unwind from program check handler. */
 
 static __always_inline unsigned long get_psw_addr(void)
 {
@@ -130,6 +132,17 @@ static __always_inline unsigned long get_psw_addr(void)
 	return psw_addr;
 }
 
+#ifdef CONFIG_KPROBES
+static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct unwindme *u = unwindme;
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+	return 0;
+}
+#endif
+
 /* This function may or may not appear in the backtrace. */
 static noinline int unwindme_func4(struct unwindme *u)
 {
@@ -140,6 +153,34 @@ static noinline int unwindme_func4(struct unwindme *u)
 		wait_event(u->task_wq, kthread_should_park());
 		kthread_parkme();
 		return 0;
+#ifdef CONFIG_KPROBES
+	} else if (u->flags & UWM_PGM) {
+		struct kprobe kp;
+		int ret;
+
+		unwindme = u;
+		memset(&kp, 0, sizeof(kp));
+		kp.symbol_name = "do_report_trap";
+		kp.pre_handler = pgm_pre_handler;
+		ret = register_kprobe(&kp);
+		if (ret < 0) {
+			pr_err("register_kprobe failed %d\n", ret);
+			return -EINVAL;
+		}
+
+		/*
+		 * trigger specification exception
+		 */
+		asm volatile(
+			"	mvcl	%%r1,%%r1\n"
+			"0:	nopr	%%r7\n"
+			EX_TABLE(0b, 0b)
+			:);
+
+		unregister_kprobe(&kp);
+		unwindme = NULL;
+		return u->ret;
+#endif
 	} else {
 		struct pt_regs regs;
 
@@ -286,6 +327,12 @@ do {									\
 	TEST(UWM_IRQ | UWM_CALLER | UWM_SP);
 	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS);
 	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+#ifdef CONFIG_KPROBES
+	TEST(UWM_PGM);
+	TEST(UWM_PGM | UWM_SP);
+	TEST(UWM_PGM | UWM_REGS);
+	TEST(UWM_PGM | UWM_SP | UWM_REGS);
+#endif
 #undef TEST
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 222ee9087a730b1df08d09baed0d03626e67600f Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 27 Nov 2019 17:37:51 +0100
Subject: s390/unwind: start unwinding from reliable state

A comment in arch/s390/include/asm/unwind.h says:
> If 'first_frame' is not zero unwind_start skips unwind frames until it
> reaches the specified stack pointer.
> The end of the unwinding is indicated with unwind_done, this can be true
> right after unwind_start, e.g. with first_frame!=0 that can not be found.
> unwind_next_frame skips to the next frame.
> Once the unwind is completed unwind_error() can be used to check if there
> has been a situation where the unwinder could not correctly understand
> the tasks call chain.

With this change backchain unwinder now comply with behaviour
described. As well as matches orc unwinder implementation.  Now unwinder
starts from reliable state, i.e. __unwind_start own stack frame is
taken or stack frame generated by __switch_to (ksp) - both known to be
valid. In case of pt_regs %r15 is better match for pt_regs psw, than
sometimes random "sp" caller passed.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/unwind.h |  6 +++---
 arch/s390/kernel/unwind_bc.c   | 42 ++++++++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 5d6c8fe7a271..de9006b0cfeb 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -58,11 +58,11 @@ static inline bool unwind_error(struct unwind_state *state)
 static inline void unwind_start(struct unwind_state *state,
 				struct task_struct *task,
 				struct pt_regs *regs,
-				unsigned long sp)
+				unsigned long first_frame)
 {
 	task = task ?: current;
-	sp = sp ?: get_stack_pointer(task, regs);
-	__unwind_start(state, task, regs, sp);
+	first_frame = first_frame ?: get_stack_pointer(task, regs);
+	__unwind_start(state, task, regs, first_frame);
 }
 
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index c5ebb8a4cdd6..e1371cdf9fa5 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -105,13 +105,11 @@ out_stop:
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
-		    struct pt_regs *regs, unsigned long sp)
+		    struct pt_regs *regs, unsigned long first_frame)
 {
 	struct stack_info *info = &state->stack_info;
-	unsigned long *mask = &state->stack_mask;
-	bool reliable;
 	struct stack_frame *sf;
-	unsigned long ip;
+	unsigned long ip, sp;
 
 	memset(state, 0, sizeof(*state));
 	state->task = task;
@@ -123,23 +121,28 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 		return;
 	}
 
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = regs->psw.addr;
+		sp = regs->gprs[15];
+	} else if (task == current) {
+		sp = current_frame_address();
+	} else {
+		sp = task->thread.ksp;
+	}
+
 	/* Get current stack pointer and initialize stack info */
-	if (get_stack_info(sp, task, info, mask) != 0 ||
-	    !on_stack(info, sp, sizeof(struct stack_frame))) {
+	if (!update_stack_info(state, sp)) {
 		/* Something is wrong with the stack pointer */
 		info->type = STACK_TYPE_UNKNOWN;
 		state->error = true;
 		return;
 	}
 
-	/* Get the instruction pointer from pt_regs or the stack frame */
-	if (regs) {
-		ip = READ_ONCE_NOCHECK(regs->psw.addr);
-		reliable = true;
-	} else {
-		sf = (struct stack_frame *) sp;
+	if (!regs) {
+		/* Stack frame is within valid stack */
+		sf = (struct stack_frame *)sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
-		reliable = false;
 	}
 
 	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
@@ -147,6 +150,17 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
 	/* Update unwind state */
 	state->sp = sp;
 	state->ip = ip;
-	state->reliable = reliable;
+	state->reliable = true;
+
+	if (!first_frame)
+		return;
+	/* Skip through the call chain to the specified starting frame */
+	while (!unwind_done(state)) {
+		if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+			if (state->sp >= first_frame)
+				break;
+		}
+		unwind_next_frame(state);
+	}
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
-- 
cgit v1.2.3-59-g8ed1b


From bf018ee644897d7982e1b8dd8b15e97db6e1a4da Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 27 Nov 2019 18:12:04 +0100
Subject: s390/unwind: filter out unreliable bogus %r14

Currently unwinder unconditionally returns %r14 from the first frame
pointed by %r15 from pt_regs. A task could be interrupted when a function
already allocated this frame (if it needs it) for its callees or to
store local variables. In that case this frame would contain random
values from stack or values stored there by a callee. As we are only
interested in %r14 to get potential return address, skip bogus return
addresses which doesn't belong to kernel text.

This helps to avoid duplicating filtering logic in unwider users, most
of which use unwind_get_return_address() and would choke on bogus 0
address returned by it otherwise.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/unwind_bc.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index e1371cdf9fa5..ef42d5f77ce7 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -57,6 +57,11 @@ bool unwind_next_frame(struct unwind_state *state)
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
 		reliable = false;
 		regs = NULL;
+		if (!__kernel_text_address(ip)) {
+			/* skip bogus %r14 */
+			state->regs = NULL;
+			return unwind_next_frame(state);
+		}
 	} else {
 		sf = (struct stack_frame *) state->sp;
 		sp = READ_ONCE_NOCHECK(sf->back_chain);
-- 
cgit v1.2.3-59-g8ed1b


From be2d11b2a1e86586ace9f6839a159b170b00f2b3 Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Wed, 27 Nov 2019 19:35:19 +0100
Subject: s390/unwind: add stack pointer alignment sanity checks

ABI requires SP to be aligned 8 bytes, report unwinding error otherwise.

Link: https://lkml.kernel.org/r/20191106095601.29986-5-mbenes@suse.cz
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/dumpstack.c | 4 ++++
 arch/s390/kernel/unwind_bc.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index d74e21a23703..d306fe04489a 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -94,6 +94,10 @@ int get_stack_info(unsigned long sp, struct task_struct *task,
 	if (!sp)
 		goto unknown;
 
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto unknown;
+
 	/* Check per-task stack */
 	if (in_task_stack(sp, task, info))
 		goto recursion_check;
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index ef42d5f77ce7..da2d4d4c5b0e 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -92,6 +92,10 @@ bool unwind_next_frame(struct unwind_state *state)
 		}
 	}
 
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto out_err;
+
 	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
 
 	/* Update unwind state */
-- 
cgit v1.2.3-59-g8ed1b


From aa137a6d302b5989ed205b7dfb7fe40a8851babc Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Wed, 6 Nov 2019 10:56:01 +0100
Subject: s390/livepatch: Implement reliable stack tracing for the consistency
 model

The livepatch consistency model requires reliable stack tracing
architecture support in order to work properly. In order to achieve
this, two main issues have to be solved. First, reliable and consistent
call chain backtracing has to be ensured. Second, the unwinder needs to
be able to detect stack corruptions and return errors.

The "zSeries ELF Application Binary Interface Supplement" says:

  "The stack pointer points to the first word of the lowest allocated
  stack frame. If the "back chain" is implemented this word will point to
  the previously allocated stack frame (towards higher addresses), except
  for the first stack frame, which shall have a back chain of zero (NULL).
  The stack shall grow downwards, in other words towards lower addresses."

"back chain" is optional. GCC option -mbackchain enables it. Quoting
Martin Schwidefsky [1]:

  "The compiler is called with the -mbackchain option, all normal C
  function will store the backchain in the function prologue. All
  functions written in assembler code should do the same, if you find one
  that does not we should fix that. The end result is that a task that
  *voluntarily* called schedule() should have a proper backchain at all
  times.

  Dependent on the use case this may or may not be enough. Asynchronous
  interrupts may stop the CPU at the beginning of a function, if kernel
  preemption is enabled we can end up with a broken backchain.  The
  production kernels for IBM Z are all compiled *without* kernel
  preemption. So yes, we might get away without the objtool support.

  On a side-note, we do have a line item to implement the ORC unwinder for
  the kernel, that includes the objtool support. Once we have that we can
  drop the -mbackchain option for the kernel build. That gives us a nice
  little performance benefit. I hope that the change from backchain to the
  ORC unwinder will not be too hard to implement in the livepatch tools."

Since -mbackchain is enabled by default when the kernel is compiled, the
call chain backtracing should be currently ensured and objtool should
not be necessary for livepatch purposes.

Regarding the second issue, stack corruptions and non-reliable states
have to be recognized by the unwinder. Mainly it means to detect
preemption or page faults, the end of the task stack must be reached,
return addresses must be valid text addresses and hacks like function
graph tracing and kretprobes must be properly detected.

Unwinding a running task's stack is not a problem, because there is a
livepatch requirement that every checked task is blocked, except for the
current task. Due to that, the implementation can be much simpler
compared to the existing non-reliable infrastructure. We can consider a
task's kernel/thread stack only and skip the other stacks.

[1] 20180912121106.31ffa97c@mschwideX1 [not archived on lore.kernel.org]

Link: https://lkml.kernel.org/r/20191106095601.29986-5-mbenes@suse.cz
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig             |  1 +
 arch/s390/kernel/stacktrace.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2528eb9d01fb..367a87c5d7b8 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -170,6 +170,7 @@ config S390
 	select HAVE_PERF_EVENTS
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index f8fc4f8aef9b..fc5419ac64c8 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -9,6 +9,7 @@
 #include <linux/stacktrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
+#include <asm/kprobes.h>
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
@@ -22,3 +23,45 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 			break;
 	}
 }
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK)
+			return -EINVAL;
+
+		if (state.regs)
+			return -EINVAL;
+
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			return -EINVAL;
+
+#ifdef CONFIG_KPROBES
+		/*
+		 * Mark stacktraces with kretprobed functions on them
+		 * as unreliable.
+		 */
+		if (state.ip == (unsigned long)kretprobe_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, addr, false))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 2115fbf7210bd053ba55a95e7ebc366df41aa9cf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Nov 2019 13:59:25 +0100
Subject: s390: remove compat vdso code

Remove compat vdso code, since there is hardly any compat user space
left. Still existing compat user space will have to use system calls
instead.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig                        |   3 -
 arch/s390/Makefile                       |   1 -
 arch/s390/kernel/Makefile                |   1 -
 arch/s390/kernel/vdso.c                  |  42 +-------
 arch/s390/kernel/vdso32/.gitignore       |   1 -
 arch/s390/kernel/vdso32/Makefile         |  66 ------------
 arch/s390/kernel/vdso32/clock_getres.S   |  44 --------
 arch/s390/kernel/vdso32/clock_gettime.S  | 179 -------------------------------
 arch/s390/kernel/vdso32/getcpu.S         |  31 ------
 arch/s390/kernel/vdso32/gettimeofday.S   | 103 ------------------
 arch/s390/kernel/vdso32/note.S           |  13 ---
 arch/s390/kernel/vdso32/vdso32.lds.S     | 142 ------------------------
 arch/s390/kernel/vdso32/vdso32_wrapper.S |  15 ---
 13 files changed, 3 insertions(+), 638 deletions(-)
 delete mode 100644 arch/s390/kernel/vdso32/.gitignore
 delete mode 100644 arch/s390/kernel/vdso32/Makefile
 delete mode 100644 arch/s390/kernel/vdso32/clock_getres.S
 delete mode 100644 arch/s390/kernel/vdso32/clock_gettime.S
 delete mode 100644 arch/s390/kernel/vdso32/getcpu.S
 delete mode 100644 arch/s390/kernel/vdso32/gettimeofday.S
 delete mode 100644 arch/s390/kernel/vdso32/note.S
 delete mode 100644 arch/s390/kernel/vdso32/vdso32.lds.S
 delete mode 100644 arch/s390/kernel/vdso32/vdso32_wrapper.S

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 367a87c5d7b8..d4051e88e625 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -427,9 +427,6 @@ config COMPAT
 	  (and some other stuff like libraries and such) is needed for
 	  executing 31 bit applications.  It is safe to say "Y".
 
-config COMPAT_VDSO
-	def_bool COMPAT && !CC_IS_CLANG
-
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 478b645b20dd..ba8556bb0fb1 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -157,7 +157,6 @@ zfcpdump:
 
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7edbbcd8228a..2b1203cf7be6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -81,4 +81,3 @@ obj-$(CONFIG_TRACEPOINTS)	+= trace.o
 
 # vdso
 obj-y				+= vdso64/
-obj-$(CONFIG_COMPAT_VDSO)	+= vdso32/
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ed1fc08ccea2..bcc9bdb39ba2 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,13 +29,6 @@
 #include <asm/vdso.h>
 #include <asm/facility.h>
 
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
 static unsigned int vdso64_pages;
@@ -55,12 +48,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
 
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm) {
-		vdso_pagelist = vdso32_pagelist;
-		vdso_pages = vdso32_pages;
-	}
-#endif
 
 	if (vmf->pgoff >= vdso_pages)
 		return VM_FAULT_SIGBUS;
@@ -76,10 +63,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	unsigned long vdso_pages;
 
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 
 	if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
 		return -EINVAL;
@@ -209,12 +192,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!vdso_enabled)
 		return 0;
 
+	if (is_compat_task())
+		return 0;
+
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	mm->context.compat_mm = is_compat_task();
-	if (mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 	/*
 	 * vDSO has a problem and was disabled, just don't "enable" it for
 	 * the process
@@ -267,23 +248,6 @@ static int __init vdso_init(void)
 	int i;
 
 	vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT_VDSO
-	/* Calculate the size of the 32 bit vDSO */
-	vdso32_pages = ((&vdso32_end - &vdso32_start
-			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
-	/* Make sure pages are in the correct state */
-	vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
-				  GFP_KERNEL);
-	BUG_ON(vdso32_pagelist == NULL);
-	for (i = 0; i < vdso32_pages - 1; i++) {
-		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-		get_page(pg);
-		vdso32_pagelist[i] = pg;
-	}
-	vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
-	vdso32_pagelist[vdso32_pages] = NULL;
-#endif
 
 	/* Calculate the size of the 64 bit vDSO */
 	vdso64_pages = ((&vdso64_end - &vdso64_start
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9d0ced..000000000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index aee9ffbccb54..000000000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-KCOV_INSTRUMENT := n
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-		    -Wl,--hash-style=both
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
-	$(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
-	$(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
-quiet_cmd_vdso32as = VDSO32A $@
-      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1417f6..000000000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	basr	%r1,0
-	la	%r1,4f-.(%r1)
-	chi	%r2,__CLOCK_REALTIME
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	la	%r1,5f-4f(%r1)
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	jne	3f
-0:	ltr	%r3,%r3
-	jz	2f				/* res == NULL */
-1:	l	%r0,0(%r1)
-	xc	0(4,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	st	%r0,4(%r3)			/* store tp->tv_usec */
-2:	lhi	%r2,0
-	br	%r14
-3:	lhi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-4:	.long	__CLOCK_REALTIME_RES
-5:	.long	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11a16e5..000000000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,21f-0b(%r5)			/* get &_vdso_data */
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	10f
-	chi	%r2,__CLOCK_REALTIME
-	je	11f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	9f
-	chi	%r2,__CLOCK_MONOTONIC
-	jne	19f
-
-	/* CLOCK_MONOTONIC */
-1:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,1(%r15)
-	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,2f
-	ahi	%r0,-1
-2:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	3f
-	a	%r0,__VDSO_TK_MULT(%r5)
-3:	alr	%r0,%r2
-	al	%r0,__VDSO_WTOM_NSEC(%r5)
-	al	%r1,__VDSO_WTOM_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_WTOM_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,20f-6b(%r5)
-	jl	8f
-7:	ahi	%r2,1
-	sl	%r1,20f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	9b
-	l	%r2,__VDSO_WTOM_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	9b
-	j	8b
-
-	/* CLOCK_REALTIME_COARSE */
-10:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	10b
-	l	%r2,__VDSO_XTIME_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	10b
-	j	17f
-
-	/* CLOCK_REALTIME */
-11:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	11b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)			/* no - ts_steering_end */
-	sl	%r1,5(%r15)
-	brc	3,22f
-	ahi	%r0,-1
-22:	ltr	%r0,%r0				/* past end of steering? */
-	jm	24f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	23f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	23f
-	ahi	%r0,-1
-23:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,25f
-	ahi	%r0,1
-	j	25f
-24:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-25:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,12f
-	ahi	%r0,-1
-12:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	13f
-	a	%r0,__VDSO_TK_MULT(%r5)
-13:	alr	%r0,%r2
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,14f
-	ahi	%r0,1
-14:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	11b
-	basr	%r5,0
-15:	ltr	%r0,%r0
-	jnz	16f
-	cl	%r1,20f-15b(%r5)
-	jl	17f
-16:	ahi	%r2,1
-	sl	%r1,20f-15b(%r5)
-	brc	3,15b
-	ahi	%r0,-1
-	j	15b
-17:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19:	lhi	%r1,__NR_clock_gettime
-	svc	0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-20:	.long	1000000000
-21:	.long	_vdso_data - 0b
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index dc79e169f0ad..000000000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2016
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_getcpu
-	.type  __kernel_getcpu,@function
-__kernel_getcpu:
-	CFI_STARTPROC
-	sacf	256
-	lm	%r4,%r5,__VDSO_GETCPU_VAL(%r0)
-	sacf	0
-	ltr	%r2,%r2
-	jz	2f
-	st	%r5,0(%r2)
-2:	ltr	%r3,%r3
-	jz	3f
-	st	%r4,0(%r3)
-3:	lhi	%r2,0
-	br	%r14
-	CFI_ENDPROC
-	.size	__kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063fbc892..000000000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,13f-0b(%r5)			/* get &_vdso_data */
-1:	ltr	%r3,%r3				/* check if tz is NULL */
-	je	2f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-2:	ltr	%r2,%r2				/* check if tv is NULL */
-	je	10f
-	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)
-	sl	%r1,5(%r15)
-	brc	3,14f
-	ahi	%r0,-1
-14:	ltr	%r0,%r0				/* past end of steering? */
-	jm	16f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	15f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	15f
-	ahi	%r0,-1
-15:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,17f
-	ahi	%r0,1
-	j	17f
-16:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-17:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,3f
-	ahi	%r0,-1
-3:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	st	%r0,0(%r15)
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	4f
-	a	%r0,__VDSO_TK_MULT(%r5)
-4:	al	%r0,0(%r15)
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + xtime */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	mvc	0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	l	%r4,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r4)			/*  >> tk->shift */
-	l	%r4,0(%r15)			/* get tv_sec from stack */
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,11f-6b(%r5)
-	jl	8f
-7:	ahi	%r4,1
-	sl	%r1,11f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r4,0(%r2)			/* store tv->tv_sec */
-	ltr	%r1,%r1
-	m	%r0,12f-6b(%r5)
-	jnm	9f
-	al	%r0,12f-6b(%r5)
-9:	srl	%r0,6
-	st	%r0,4(%r2)			/* store tv->tv_usec */
-10:	slr	%r2,%r2
-	ahi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-11:	.long	1000000000
-12:	.long	274877907
-13:	.long	_vdso_data - 0b
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d0680a0a..000000000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c4954cb6e..000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO32_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-	} :text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab	       0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the
-	 * beginning of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug		0 : { *(.debug) }
-	.line		0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo	0 : { *(.debug_srcinfo) }
-	.debug_sfnames	0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges	0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev	0 : { *(.debug_abbrev) }
-	.debug_line	0 : { *(.debug_line) }
-	.debug_frame	0 : { *(.debug_frame) }
-	.debug_str	0 : { *(.debug_str) }
-	.debug_loc	0 : { *(.debug_loc) }
-	.debug_macinfo	0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-	/* DWARF 3 */
-	.debug_pubtypes 0 : { *(.debug_pubtypes) }
-	.debug_ranges	0 : { *(.debug_ranges) }
-	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
-	. = ALIGN(PAGE_SIZE);
-	PROVIDE(_vdso_data = .);
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_getcpu;
-
-	local: *;
-	};
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb930471a..000000000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso32_start, vdso32_end
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/s390/kernel/vdso32/vdso32.so"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
-- 
cgit v1.2.3-59-g8ed1b