From 2f0e8aae26a27fe73d033788f8e92188e7584f41 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sun, 24 Jul 2022 15:02:16 +0200 Subject: s390/mm: rework memcpy_real() to avoid DAT-off mode Function memcpy_real() is an univeral data mover that does not require DAT mode to be able reading from a physical address. Its advantage is an ability to read from any address, even those for which no kernel virtual mapping exists. Although memcpy_real() is interrupt-safe, there are no handlers that make use of this function. The compiler instrumentation have to be disabled and separate no-DAT stack used to allow execution of the function once DAT mode is disabled. Rework memcpy_real() to overcome these shortcomings. As result, data copying (which is primarily reading out a crashed system memory by a user process) is executed on a regular stack with enabled interrupts. Also, use of memcpy_real_buf swap buffer becomes unnecessary and the swapping is eliminated. The above is achieved by using a fixed virtual address range that spans a single page and remaps that page repeatedly when memcpy_real() is called for a particular physical address. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 5 +- arch/s390/include/asm/maccess.h | 14 +++++ arch/s390/include/asm/pgtable.h | 1 + arch/s390/include/asm/processor.h | 2 - arch/s390/kernel/crash_dump.c | 25 +-------- arch/s390/kernel/setup.c | 3 +- arch/s390/mm/maccess.c | 107 ++++++++++++++++---------------------- arch/s390/mm/vmem.c | 2 +- 8 files changed, 70 insertions(+), 89 deletions(-) create mode 100644 arch/s390/include/asm/maccess.h (limited to 'arch/s390') diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 41b7af7a9365..6e7f01ca53e6 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -17,6 +17,7 @@ unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__abs_lowcore); +unsigned long __bootdata_preserved(__memcpy_real_area); unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); @@ -182,7 +183,9 @@ static void setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - __abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE); + __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, + sizeof(struct lowcore)); MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h new file mode 100644 index 000000000000..21ebd9bf34cc --- /dev/null +++ b/arch/s390/include/asm/maccess.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_S390_MACCESS_H +#define __ASM_S390_MACCESS_H + +#include + +struct iov_iter; + +extern unsigned long __memcpy_real_area; +void memcpy_real_init(void); +size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count); +int memcpy_real(void *dest, unsigned long src, size_t count); + +#endif /* __ASM_S390_MACCESS_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 45617c966202..f1cb9391190d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1780,6 +1780,7 @@ extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot); extern void vmem_unmap_4k_page(unsigned long addr); +extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 93677ae89e7e..4eb9b7875e13 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -306,8 +306,6 @@ static __always_inline void __noreturn disabled_wait(void) #define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL -extern int memcpy_real(void *, unsigned long, size_t); - extern int s390_isolate_bp(void); extern int s390_isolate_bp_guest(void); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index bad8f47fc5d6..438fe696a4a3 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -21,6 +21,7 @@ #include #include #include +#include #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) @@ -53,8 +54,6 @@ struct save_area { }; static LIST_HEAD(dump_save_areas); -static DEFINE_MUTEX(memcpy_real_mutex); -static char memcpy_real_buf[PAGE_SIZE]; /* * Allocate a save area @@ -116,26 +115,6 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); } -static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count) -{ - size_t len, copied, res = 0; - - mutex_lock(&memcpy_real_mutex); - while (count) { - len = min(PAGE_SIZE, count); - if (memcpy_real(memcpy_real_buf, src, len)) - break; - copied = copy_to_iter(memcpy_real_buf, len, iter); - count -= copied; - src += copied; - res += copied; - if (copied < len) - break; - } - mutex_unlock(&memcpy_real_mutex); - return res; -} - size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) { size_t len, copied, res = 0; @@ -156,7 +135,7 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) } else { len = count; } - copied = copy_to_iter_real(iter, src, len); + copied = memcpy_real_iter(iter, src, len); } count -= copied; src += copied; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d1390899a8e7..ab19ddb09d65 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include "entry.h" @@ -1050,7 +1051,7 @@ void __init setup_arch(char **cmdline_p) * Create kernel page tables and switch to virtual addressing. */ paging_init(); - + memcpy_real_init(); /* * After paging_init created the kernel page table, the new PSWs * in lowcore can now run with DAT enabled. diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index b8451ddbb3d6..bd1bcbb02938 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,12 +12,17 @@ #include #include #include +#include #include #include #include #include #include +unsigned long __bootdata_preserved(__memcpy_real_area); +static __ro_after_init pte_t *memcpy_real_ptep; +static DEFINE_MUTEX(memcpy_real_mutex); + static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long aligned, offset, count; @@ -77,75 +82,55 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) return dst; } -static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) +void __init memcpy_real_init(void) { - union register_pair _dst, _src; - int rc = -EFAULT; - - _dst.even = (unsigned long) dest; - _dst.odd = (unsigned long) count; - _src.even = (unsigned long) src; - _src.odd = (unsigned long) count; - asm volatile ( - "0: mvcle %[dst],%[src],0\n" - "1: jo 0b\n" - " lhi %[rc],0\n" - "2:\n" - EX_TABLE(1b,2b) - : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair) - : : "cc", "memory"); - return rc; + memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true); + if (!memcpy_real_ptep) + panic("Couldn't setup memcpy real area"); } -static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, - unsigned long src, - unsigned long count) +size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) { - int irqs_disabled, rc; - unsigned long flags; - - if (!count) - return 0; - flags = arch_local_irq_save(); - irqs_disabled = arch_irqs_disabled_flags(flags); - if (!irqs_disabled) - trace_hardirqs_off(); - __arch_local_irq_stnsm(0xf8); // disable DAT - rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); - if (flags & PSW_MASK_DAT) - __arch_local_irq_stosm(0x04); // enable DAT - if (!irqs_disabled) - trace_hardirqs_on(); - __arch_local_irq_ssm(flags); - return rc; + size_t len, copied, res = 0; + unsigned long phys, offset; + void *chunk; + pte_t pte; + + while (count) { + phys = src & PAGE_MASK; + offset = src & ~PAGE_MASK; + chunk = (void *)(__memcpy_real_area + offset); + len = min(count, PAGE_SIZE - offset); + pte = mk_pte_phys(phys, PAGE_KERNEL_RO); + + mutex_lock(&memcpy_real_mutex); + if (pte_val(pte) != pte_val(*memcpy_real_ptep)) { + __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL); + set_pte(memcpy_real_ptep, pte); + } + copied = copy_to_iter(chunk, len, iter); + mutex_unlock(&memcpy_real_mutex); + + count -= copied; + src += copied; + res += copied; + if (copied < len) + break; + } + return res; } -/* - * Copy memory in real mode (kernel to kernel) - */ int memcpy_real(void *dest, unsigned long src, size_t count) { - unsigned long _dest = (unsigned long)dest; - unsigned long _src = (unsigned long)src; - unsigned long _count = (unsigned long)count; - int rc; - - if (S390_lowcore.nodat_stack != 0) { - preempt_disable(); - rc = call_on_stack(3, S390_lowcore.nodat_stack, - unsigned long, _memcpy_real, - unsigned long, _dest, - unsigned long, _src, - unsigned long, _count); - preempt_enable(); - return rc; - } - /* - * This is a really early memcpy_real call, the stacks are - * not set up yet. Just call _memcpy_real on the early boot - * stack - */ - return _memcpy_real(_dest, _src, _count); + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dest; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (memcpy_real_iter(&iter, src, count) < count) + return -EFAULT; + return 0; } /* diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a50a809e024f..ee1a97078527 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -567,7 +567,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) * while traversing is an error, since the function is expected to be * called against virtual regions reserverd for 4KB mappings only. */ -static pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) +pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) { pte_t *ptep = NULL; pgd_t *pgd; -- cgit v1.2.3-59-g8ed1b