From 81d2c6f81996e01fbcd2b5aeefbb519e21c806e9 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 20 Aug 2019 12:49:40 +1000 Subject: kasan: support instrumented bitops combined with generic bitops Currently bitops-instrumented.h assumes that the architecture provides atomic, non-atomic and locking bitops (e.g. both set_bit and __set_bit). This is true on x86 and s390, but is not always true: there is a generic bitops/non-atomic.h header that provides generic non-atomic operations, and also a generic bitops/lock.h for locking operations. powerpc uses the generic non-atomic version, so it does not have it's own e.g. __set_bit that could be renamed arch___set_bit. Split up bitops-instrumented.h to mirror the atomic/non-atomic/lock split. This allows arches to only include the headers where they have arch-specific versions to rename. Update x86 and s390. (The generic operations are automatically instrumented because they're written in C, not asm.) Suggested-by: Christophe Leroy Reviewed-by: Christophe Leroy Signed-off-by: Daniel Axtens Acked-by: Marco Elver Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190820024941.12640-1-dja@axtens.net --- arch/s390/include/asm/bitops.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index eb7eed43e780..431e208a5ea4 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -241,7 +241,9 @@ static inline void arch___clear_bit_unlock(unsigned long nr, arch___clear_bit(nr, ptr); } -#include +#include +#include +#include /* * Functions which use MSB0 bit numbering. -- cgit v1.2.3-59-g8ed1b From 5b009673594d569674a9e0e60109f6a1723075b0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 4 Dec 2019 16:52:57 -0800 Subject: arch: ipcbuf.h: make uapi asm/ipcbuf.h self-contained Userspace cannot compile due to some missing type definitions. For example, building it for x86 fails as follows: CC usr/include/asm/ipcbuf.h.s In file included from usr/include/asm/ipcbuf.h:1:0, from :32: usr/include/asm-generic/ipcbuf.h:21:2: error: unknown type name `__kernel_key_t' __kernel_key_t key; ^~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:22:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t uid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:23:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t gid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:24:2: error: unknown type name `__kernel_uid32_t' __kernel_uid32_t cuid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:25:2: error: unknown type name `__kernel_gid32_t' __kernel_gid32_t cgid; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:26:2: error: unknown type name `__kernel_mode_t' __kernel_mode_t mode; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:28:35: error: `__kernel_mode_t' undeclared here (not in a function) unsigned char __pad1[4 - sizeof(__kernel_mode_t)]; ^~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:31:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused1; ^~~~~~~~~~~~~~~~ usr/include/asm-generic/ipcbuf.h:32:2: error: unknown type name `__kernel_ulong_t' __kernel_ulong_t __unused2; ^~~~~~~~~~~~~~~~ It is just a matter of missing include directive. Include to make it self-contained, and add it to the compile-test coverage. Link: http://lkml.kernel.org/r/20191030063855.9989-1-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/uapi/asm/ipcbuf.h | 2 ++ arch/sparc/include/uapi/asm/ipcbuf.h | 2 ++ arch/xtensa/include/uapi/asm/ipcbuf.h | 2 ++ include/uapi/asm-generic/ipcbuf.h | 2 ++ usr/include/Makefile | 1 - 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h index 5b1c4f47c656..1030cd186899 100644 --- a/arch/s390/include/uapi/asm/ipcbuf.h +++ b/arch/s390/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __S390_IPCBUF_H__ #define __S390_IPCBUF_H__ +#include + /* * The user_ipc_perm structure for S/390 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..5b933a598a33 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __SPARC_IPCBUF_H #define __SPARC_IPCBUF_H +#include + /* * The ipc64_perm structure for sparc/sparc64 architecture. * Note extra padding because this structure is passed back and forth diff --git a/arch/xtensa/include/uapi/asm/ipcbuf.h b/arch/xtensa/include/uapi/asm/ipcbuf.h index a57afa0b606f..3bd0642f6660 100644 --- a/arch/xtensa/include/uapi/asm/ipcbuf.h +++ b/arch/xtensa/include/uapi/asm/ipcbuf.h @@ -12,6 +12,8 @@ #ifndef _XTENSA_IPCBUF_H #define _XTENSA_IPCBUF_H +#include + /* * Pad space is left for: * - 32-bit mode_t and seq diff --git a/include/uapi/asm-generic/ipcbuf.h b/include/uapi/asm-generic/ipcbuf.h index 7d80dbd336fb..41a01b494fc7 100644 --- a/include/uapi/asm-generic/ipcbuf.h +++ b/include/uapi/asm-generic/ipcbuf.h @@ -2,6 +2,8 @@ #ifndef __ASM_GENERIC_IPCBUF_H #define __ASM_GENERIC_IPCBUF_H +#include + /* * The generic ipc64_perm structure: * Note extra padding because this structure is passed back and forth diff --git a/usr/include/Makefile b/usr/include/Makefile index 5acd9bb6d714..00cc763ec488 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -16,7 +16,6 @@ override c_flags = $(UAPI_CFLAGS) -Wp,-MD,$(depfile) -I$(objtree)/usr/include # Please consider to fix the header first. # # Sorted alphabetically. -header-test- += asm/ipcbuf.h header-test- += asm/msgbuf.h header-test- += asm/sembuf.h header-test- += asm/shmbuf.h -- cgit v1.2.3-59-g8ed1b From b62b6cf170237677ae96001940c54d5c09d0015a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 3 Dec 2019 17:52:03 +0100 Subject: s390/spinlock: remove confusing comment in arch_spin_lock_wait arch_spin_lock_wait does not take steal time into consideration. Signed-off-by: Vasily Gorbik --- arch/s390/lib/spinlock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index ce1e4bbe53aa..9b2dab5a69f9 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) void arch_spin_lock_wait(arch_spinlock_t *lp) { - /* Use classic spinlocks + niai if the steal time is >= 10% */ if (test_cpu_flag(CIF_DEDICATED_CPU)) arch_spin_lock_queued(lp); else -- cgit v1.2.3-59-g8ed1b From 7e914fd17e9a750d8896e5645c84e75e52d77a4b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Dec 2019 09:02:15 +0000 Subject: s390/test_unwind: fix spelling mistake "reqister" -> "register" There is a spelling mistake in a pr_info message. Fix it. Link: https://lkml.kernel.org/r/20191202090215.28766-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Vasily Gorbik --- arch/s390/lib/test_unwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index bda7ac0ddd29..32b7a30b2485 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -238,7 +238,7 @@ static int test_unwind_irq(struct unwindme *u) { preempt_disable(); if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) { - pr_info("Couldn't reqister external interrupt handler"); + pr_info("Couldn't register external interrupt handler"); return -1; } u->task = current; -- cgit v1.2.3-59-g8ed1b From 39d4a501a9ef55c57b51e3ef07fc2aeed7f30b3b Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 28 Nov 2019 10:26:41 +0100 Subject: s390/cpum_sf: Adjust sampling interval to avoid hitting sample limits Function perf_event_ever_overflow() and perf_event_account_interrupt() are called every time samples are processed by the interrupt handler. However function perf_event_account_interrupt() has checks to avoid being flooded with interrupts (more then 1000 samples are received per task_tick). Samples are then dropped and a PERF_RECORD_THROTTLED is added to the perf data. The perf subsystem limit calculation is: maximum sample frequency := 100000 --> 1 samples per 10 us task_tick = 10ms = 10000us --> 1000 samples per task_tick The work flow is measurement_alert() uses SDBT head and each SBDT points to 511 SDB pages, each with 126 sample entries. After processing 8 SBDs and for each valid sample calling: perf_event_overflow() perf_event_account_interrupts() there is a considerable amount of samples being dropped, especially when the sample frequency is very high and near the 100000 limit. To avoid the high amount of samples being dropped near the end of a task_tick time frame, increment the sampling interval in case of dropped events. The CPU Measurement sampling facility on the s390 supports only intervals, specifiing how many CPU cycles have to be executed before a sample is generated. Increase the interval when the samples being generated hit the task_tick limit. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c07fdcd73726..3f6b02ee73ec 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1315,6 +1315,22 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "%s: " "overflows: sample %llu event %llu" -- cgit v1.2.3-59-g8ed1b From 0539ad0b22877225095d8adef0c376f52cc23834 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 29 Nov 2019 15:24:25 +0100 Subject: s390/cpum_sf: Avoid SBD overflow condition in irq handler The s390 CPU Measurement sampling facility has an overflow condition which fires when all entries in a SBD are used. The measurement alert interrupt is triggered and reads out all samples in this SDB. It then tests the successor SDB, if this SBD is not full, the interrupt handler does not read any samples at all from this SDB The design waits for the hardware to fill this SBD and then trigger another meassurement alert interrupt. This scheme works nicely until an perf_event_overflow() function call discards the sample due to a too high sampling rate. The interrupt handler has logic to read out a partially filled SDB when the perf event overflow condition in linux common code is met. This causes the CPUM sampling measurement hardware and the PMU device driver to operate on the same SBD's trailer entry. This should not happen. This can be seen here using this trace: cpumsf_pmu_add: tear:0xb5286000 hw_perf_event_update: sdbt 0xb5286000 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 1. interrupt hw_perf_event_update: sdbt 0xb5286008 full 1 over 0 flush_all:0 hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0 above shows 2. interrupt ... this goes on fine until... hw_perf_event_update: sdbt 0xb5286068 full 1 over 0 flush_all:0 perf_push_sample1: overflow one or more samples read from the IRQ handler are rejected by perf_event_overflow() and the IRQ handler advances to the next SDB and modifies the trailer entry of a partially filled SDB. hw_perf_event_update: sdbt 0xb5286070 full 0 over 0 flush_all:1 timestamp: 14:32:52.519953 Next time the IRQ handler is called for this SDB the trailer entry shows an overflow count of 19 missed entries. hw_perf_event_update: sdbt 0xb5286070 full 1 over 19 flush_all:1 timestamp: 14:32:52.970058 Remove access to a follow on SDB when event overflow happened. Signed-off-by: Thomas Richter Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_sf.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3f6b02ee73ec..77d93c534284 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1303,12 +1303,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ -- cgit v1.2.3-59-g8ed1b From 157309a97015d8b32d65847d8a6d493d1a958e93 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 6 Dec 2019 08:27:51 +0100 Subject: s390/uv: use EOPNOTSUPP instead of ENOTSUPP ENOTSUP is just an internal kernel error and should never reach userspace. The return value of the share function is not exported to userspace, but to avoid giving bad examples let us use EOPNOTSUPP: Suggested-by: Heiko Carstens Acked-by: Janosch Frank Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index ef3c00b049ab..4093a2856929 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd) }; if (!is_prot_virt_guest()) - return -ENOTSUPP; + return -EOPNOTSUPP; /* * Sharing is page wise, if we encounter addresses that are * not page aligned, we assume something went wrong. If -- cgit v1.2.3-59-g8ed1b From 1b68ac8678a8e9993deebd55014cbe803e78ca02 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 29 Nov 2019 12:59:59 +0100 Subject: s390: remove last diag 0x44 caller diag 0x44 is a voluntary undirected yield of a virtual CPU. This has caused a lot of performance issues in the past. There is only one caller left, and that one is only executed if diag 0x9c (directed yield) is not present. Given that all hypervisors implement diag 0x9c anyway, remove the last diag 0x44 to avoid that more callers will be added. Worst case that could happen now, if diag 0x9c is not present, is that a virtual CPU would loop a bit instead of giving its time slice up. diag 0x44 statistics in debugfs are kept and will always be zero, so that user space can tell that there are no calls. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/setup.h | 2 -- arch/s390/kernel/early.c | 16 ---------------- arch/s390/kernel/smp.c | 13 +++++-------- 3 files changed, 5 insertions(+), 26 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6dc6c4fbc8e2..69289e99cabd 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -27,7 +27,6 @@ #define MACHINE_FLAG_DIAG9C BIT(3) #define MACHINE_FLAG_ESOP BIT(4) #define MACHINE_FLAG_IDTE BIT(5) -#define MACHINE_FLAG_DIAG44 BIT(6) #define MACHINE_FLAG_EDAT1 BIT(7) #define MACHINE_FLAG_EDAT2 BIT(8) #define MACHINE_FLAG_TOPOLOGY BIT(10) @@ -94,7 +93,6 @@ extern unsigned long __swsusp_reset_dma; #define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C) #define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP) #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) -#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) #define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index db32a55daaec..cd241ee66eff 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -204,21 +204,6 @@ static __init void detect_diag9c(void) S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } -static __init void detect_diag44(void) -{ - int rc; - - diag_stat_inc(DIAG_STAT_X044); - asm volatile( - " diag 0,0,0x44\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; -} - static __init void detect_machine_facilities(void) { if (test_facility(8)) { @@ -331,7 +316,6 @@ void __init startup_init(void) setup_arch_string(); setup_boot_command_line(); detect_diag9c(); - detect_diag44(); detect_machine_facilities(); save_vector_registers(); setup_topology(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2794cad9312e..a08bd2522dd9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -413,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) { - diag_stat_inc_norecursion(DIAG_STAT_X09C); - asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { - diag_stat_inc_norecursion(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); - } + if (!MACHINE_HAS_DIAG9C) + return; + diag_stat_inc_norecursion(DIAG_STAT_X09C); + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); } /* -- cgit v1.2.3-59-g8ed1b From 3e39ce266a273e5751e5792aee248cb8d9630c8b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 2 Aug 2019 12:42:59 +0200 Subject: s390/kasan: add KASAN_VMALLOC support Add KASAN_VMALLOC support which now enables vmalloc memory area access checks as well as enables usage of VMAP_STACK under kasan. KASAN_VMALLOC changes the way vmalloc and modules areas shadow memory is handled. With this new approach only top level page tables are pre-populated and lower levels are filled dynamically upon memory allocation. Acked-by: Ilya Leoshkevich Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/mm/kasan_init.c | 68 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 12 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d4051e88e625..bc88841d335d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -124,6 +124,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN + select HAVE_ARCH_KASAN_VMALLOC select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 460f25572940..06345616a646 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void) enum populate_mode { POPULATE_ONE2ONE, POPULATE_MAP, - POPULATE_ZERO_SHADOW + POPULATE_ZERO_SHADOW, + POPULATE_SHALLOW }; static void __init kasan_early_vmemmap_populate(unsigned long address, unsigned long end, @@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgd_populate(&init_mm, pg_dir, p4_dir); } + if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + P4D_SIZE) & P4D_MASK; + continue; + } + p4_dir = p4d_offset(pg_dir, address); if (p4d_none(*p4_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, p4d_populate(&init_mm, p4_dir, pu_dir); } + if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) && + mode == POPULATE_SHALLOW) { + address = (address + PUD_SIZE) & PUD_MASK; + continue; + } + pu_dir = pud_offset(p4_dir, address); if (pud_none(*pu_dir)) { if (mode == POPULATE_ZERO_SHADOW && @@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, page = kasan_early_shadow_page; pte_val(*pt_dir) = __pa(page) | pgt_prot_zero; break; + case POPULATE_SHALLOW: + /* should never happen */ + break; } } address += PAGE_SIZE; @@ -313,22 +329,50 @@ void __init kasan_early_init(void) init_mm.pgd = early_pg_dir; /* * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * | 1:1 ram mapping | /| 1/8 ram | - * +- end of ram ----+ / +----------------+ - * | ... gap ... |/ | kasan | - * +- shadow start --+ | zero | - * | 1/8 addr space | | page | - * +- shadow end -+ | mapping | - * | ... gap ... |\ | (untracked) | - * +- modules vaddr -+ \ +----------------+ - * | 2Gb | \| unmapped | allocated per module - * +-----------------+ +- shadow end ---+ + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | | + * | |/ | kasan | + * +- shadow start --+ | zero | + * | 1/8 addr space | | page | + * +- shadow end -+ | mapping | + * | ... gap ... |\ | (untracked) | + * +- vmalloc area -+ \ | | + * | vmalloc_size | \ | | + * +- modules vaddr -+ \ +----------------+ + * | 2Gb | \| unmapped | allocated per module + * +-----------------+ +- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * | 1:1 ram mapping | /| 1/8 ram | + * | | / | | + * +- end of ram ----+ / +----------------+ + * | ... gap ... | / | kasan | + * | |/ | zero | + * +- shadow start --+ | page | + * | 1/8 addr space | | mapping | + * +- shadow end -+ | (untracked) | + * | ... gap ... |\ | | + * +- vmalloc area -+ \ +- vmalloc area -+ + * | vmalloc_size | \ |shallow populate| + * +- modules vaddr -+ \ +- modules area -+ + * | 2Gb | \|shallow populate| + * +-----------------+ +- shadow end ---+ */ /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) untracked_mem_end = vmax - MODULES_LEN; + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_mem_end = vmax - vmalloc_size - MODULES_LEN; + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_early_vmemmap_populate(__sha(untracked_mem_end), + __sha(vmax), POPULATE_SHALLOW); + } + /* populate kasan shadow for untracked memory */ kasan_early_vmemmap_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); -- cgit v1.2.3-59-g8ed1b From 6feeee8efc53035c3195b02068b58ae947538aa4 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Dec 2019 09:03:12 +0100 Subject: s390/ftrace: fix endless recursion in function_graph tracer The following sequence triggers a kernel stack overflow on s390x: mount -t tracefs tracefs /sys/kernel/tracing cd /sys/kernel/tracing echo function_graph > current_tracer [crash] This is because preempt_count_{add,sub} are in the list of traced functions, which can be demonstrated by: echo preempt_count_add >set_ftrace_filter echo function_graph > current_tracer [crash] The stack overflow happens because get_tod_clock_monotonic() gets called by ftrace but itself calls preempt_{disable,enable}(), which leads to a endless recursion. Fix this by using preempt_{disable,enable}_notrace(). Fixes: 011620688a71 ("s390/time: ensure get_clock_monotonic() returns monotonic values") Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/timex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6da8885251d6..670f14a228e5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -194,9 +194,9 @@ static inline unsigned long long get_tod_clock_monotonic(void) { unsigned long long tod; - preempt_disable(); + preempt_disable_notrace(); tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; - preempt_enable(); + preempt_enable_notrace(); return tod; } -- cgit v1.2.3-59-g8ed1b From cd92ac253063981c96abfc3fdb326e430bd89ea6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Dec 2019 21:53:04 +0100 Subject: s390/purgatory: Make sure we fail the build if purgatory has missing symbols Since we link purgatory with -r aka we enable "incremental linking" no checks for unresolved symbols are done while linking the purgatory. This commit adds an extra check for unresolved symbols by calling ld without -r before running objcopy to generate purgatory.ro. This will help us catch missing symbols in the purgatory sooner. Note this commit also removes --no-undefined from LDFLAGS_purgatory as that has no effect. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/lkml/20191212205304.191610-1-hdegoede@redhat.com Tested-by: Philipp Rudo Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/purgatory/.gitignore | 1 + arch/s390/purgatory/Makefile | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index 04a03433c720..c82157f46b18 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,3 +1,4 @@ purgatory +purgatory.chk purgatory.lds purgatory.ro diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bc0d7a0d0394..13e9a5dc0a07 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y purgatory-y := head.o purgatory.o string.o sha256.o mem.o -targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro +targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE @@ -26,15 +26,22 @@ KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) -LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T +# Since we link purgatory with -r unresolved symbols are not checked, so we +# also link a purgatory.chk binary without -r to check for unresolved symbols. +PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib +LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T +LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS) $(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE $(call if_changed,ld) +$(obj)/purgatory.chk: $(obj)/purgatory FORCE + $(call if_changed,ld) + OBJCOPYFLAGS_purgatory.ro := -O elf64-s390 OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*' OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment' OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*' -$(obj)/purgatory.ro: $(obj)/purgatory FORCE +$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE $(call if_changed,objcopy) $(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE -- cgit v1.2.3-59-g8ed1b From c23587c92f6e3260fe3b82bb75b38aa2553b9468 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 18 Dec 2019 09:34:57 +0100 Subject: s390/purgatory: do not build purgatory with kcov, kasan and friends the purgatory must not rely on functions from the "old" kernel, so we must disable kasan and friends. We also need to have a separate copy of string.c as the default does not build memcmp with KASAN. Reported-by: kbuild test robot Signed-off-by: Christian Borntraeger Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/purgatory/Makefile | 6 ++++-- arch/s390/purgatory/string.c | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 arch/s390/purgatory/string.c (limited to 'arch/s390') diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 13e9a5dc0a07..c57f8c40e992 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE - $(call if_changed_rule,cc_o_c) +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c new file mode 100644 index 000000000000..c98c22a72db7 --- /dev/null +++ b/arch/s390/purgatory/string.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __HAVE_ARCH_MEMCMP /* arch function */ +#include "../lib/string.c" -- cgit v1.2.3-59-g8ed1b From eef06cbf670aaa2ccb56c9a7b84042acd657aa5d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2019 17:27:31 +0100 Subject: s390/unwind: stop gracefully at user mode pt_regs in irq stack Consider reaching user mode pt_regs at the bottom of irq stack graceful unwinder termination. This is the case when irq/mcck/ext interrupt arrives while in user mode. Signed-off-by: Vasily Gorbik --- arch/s390/kernel/unwind_bc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index da2d4d4c5b0e..707fd99f6734 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -36,10 +36,17 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp) return true; } -static inline bool is_task_pt_regs(struct unwind_state *state, - struct pt_regs *regs) +static inline bool is_final_pt_regs(struct unwind_state *state, + struct pt_regs *regs) { - return task_pt_regs(state->task) == regs; + /* user mode or kernel thread pt_regs at the bottom of task stack */ + if (task_pt_regs(state->task) == regs) + return true; + + /* user mode pt_regs at the bottom of irq stack */ + return state->stack_info.type == STACK_TYPE_IRQ && + state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs && + READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE; } bool unwind_next_frame(struct unwind_state *state) @@ -80,7 +87,7 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_err; regs = (struct pt_regs *) sp; - if (is_task_pt_regs(state, regs)) + if (is_final_pt_regs(state, regs)) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); sp = READ_ONCE_NOCHECK(regs->gprs[15]); -- cgit v1.2.3-59-g8ed1b From b4adfe55915d8363e244e42386d69567db1719b9 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2019 13:50:23 +0100 Subject: s390/ftrace: save traced function caller A typical backtrace acquired from ftraced function currently looks like the following (e.g. for "path_openat"): arch_stack_walk+0x15c/0x2d8 stack_trace_save+0x50/0x68 stack_trace_call+0x15a/0x3b8 ftrace_graph_caller+0x0/0x1c 0x3e0007e3c98 <- ftraced function caller (should be do_filp_open+0x7c/0xe8) do_open_execat+0x70/0x1b8 __do_execve_file.isra.0+0x7d8/0x860 __s390x_sys_execve+0x56/0x68 system_call+0xdc/0x2d8 Note random "0x3e0007e3c98" stack value as ftraced function caller. This value causes either imprecise unwinder result or unwinding failure. That "0x3e0007e3c98" comes from r14 of ftraced function stack frame, which it haven't had a chance to initialize since the very first instruction calls ftrace code ("ftrace_caller"). (ftraced function might never save r14 as well). Nevertheless according to s390 ABI any function is called with stack frame allocated for it and r14 contains return address. "ftrace_caller" itself is called with "brasl %r0,ftrace_caller". So, to fix this issue simply always save traced function caller onto ftraced function stack frame. Reported-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/mcount.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db..c3597d2e2ae0 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -35,6 +35,7 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP -- cgit v1.2.3-59-g8ed1b