diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 20 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 17 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/maccess.c | 175 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 104 |
5 files changed, 207 insertions, 111 deletions
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9f9af5298dd6..9953819d7959 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -8,8 +8,10 @@ #include <linux/kasan.h> #include <asm/ptdump.h> #include <asm/kasan.h> +#include <asm/abs_lowcore.h> #include <asm/nospec-branch.h> #include <asm/sections.h> +#include <asm/maccess.h> static unsigned long max_addr; @@ -21,6 +23,8 @@ struct addr_marker { enum address_markers_idx { IDENTITY_BEFORE_NR = 0, IDENTITY_BEFORE_END_NR, + AMODE31_START_NR, + AMODE31_END_NR, KERNEL_START_NR, KERNEL_END_NR, #ifdef CONFIG_KFENCE @@ -39,11 +43,17 @@ enum address_markers_idx { VMALLOC_END_NR, MODULES_NR, MODULES_END_NR, + ABS_LOWCORE_NR, + ABS_LOWCORE_END_NR, + MEMCPY_REAL_NR, + MEMCPY_REAL_END_NR, }; static struct addr_marker address_markers[] = { [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, + [AMODE31_START_NR] = {0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, "Amode31 Area End"}, [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE @@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = { [VMALLOC_END_NR] = {0, "vmalloc Area End"}, [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, { -1, NULL } }; @@ -276,8 +290,14 @@ static int pt_dump_init(void) max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[AMODE31_START_NR].start_address = __samode31; + address_markers[AMODE31_END_NR].start_address = __eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; + address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; + address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; + address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; + address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 09b6e756d521..9649d9382e0a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -268,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs) (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } -static noinline void do_fault_error(struct pt_regs *regs, int access, - vm_fault_t fault) +static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault) { int si_code; @@ -421,8 +420,6 @@ retry: if (unlikely(!(vma->vm_flags & access))) goto out_up; - if (is_vm_hugetlb_page(vma)) - address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -518,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs) fault = do_exception(regs, access); } if (unlikely(fault)) - do_fault_error(regs, access, fault); + do_fault_error(regs, fault); } NOKPROBE_SYMBOL(do_protection_exception); @@ -530,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs) access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) - do_fault_error(regs, access, fault); + do_fault_error(regs, fault); } NOKPROBE_SYMBOL(do_dat_exception); @@ -805,7 +802,7 @@ void do_secure_storage_access(struct pt_regs *regs) addr = __gmap_translate(gmap, addr); mmap_read_unlock(mm); if (IS_ERR_VALUE(addr)) { - do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); break; } fallthrough; @@ -815,7 +812,7 @@ void do_secure_storage_access(struct pt_regs *regs) vma = find_vma(mm, addr); if (!vma) { mmap_read_unlock(mm); - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); break; } page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); @@ -838,7 +835,7 @@ void do_secure_storage_access(struct pt_regs *regs) BUG(); break; default: - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); WARN_ON_ONCE(1); } } @@ -850,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs) struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; if (get_fault_type(regs) != GMAP_FAULT) { - do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + do_fault_error(regs, VM_FAULT_BADMAP); WARN_ON_ONCE(1); return; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 4a154a084966..97d66a3e60fb 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -37,7 +37,7 @@ #include <asm/kfence.h> #include <asm/ptdump.h> #include <asm/dma.h> -#include <asm/lowcore.h> +#include <asm/abs_lowcore.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/sections.h> diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d6d84e02f35a..1571cdcb0c50 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -12,10 +12,17 @@ #include <linux/errno.h> #include <linux/gfp.h> #include <linux/cpu.h> +#include <linux/uio.h> #include <asm/asm-extable.h> #include <asm/ctl_reg.h> #include <asm/io.h> +#include <asm/abs_lowcore.h> #include <asm/stacktrace.h> +#include <asm/maccess.h> + +unsigned long __bootdata_preserved(__memcpy_real_area); +static __ro_after_init pte_t *memcpy_real_ptep; +static DEFINE_MUTEX(memcpy_real_mutex); static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) { @@ -76,118 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size) return dst; } -static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) +void __init memcpy_real_init(void) { - union register_pair _dst, _src; - int rc = -EFAULT; - - _dst.even = (unsigned long) dest; - _dst.odd = (unsigned long) count; - _src.even = (unsigned long) src; - _src.odd = (unsigned long) count; - asm volatile ( - "0: mvcle %[dst],%[src],0\n" - "1: jo 0b\n" - " lhi %[rc],0\n" - "2:\n" - EX_TABLE(1b,2b) - : [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair) - : : "cc", "memory"); - return rc; -} - -static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, - unsigned long src, - unsigned long count) -{ - int irqs_disabled, rc; - unsigned long flags; - - if (!count) - return 0; - flags = arch_local_irq_save(); - irqs_disabled = arch_irqs_disabled_flags(flags); - if (!irqs_disabled) - trace_hardirqs_off(); - __arch_local_irq_stnsm(0xf8); // disable DAT - rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); - if (flags & PSW_MASK_DAT) - __arch_local_irq_stosm(0x04); // enable DAT - if (!irqs_disabled) - trace_hardirqs_on(); - __arch_local_irq_ssm(flags); - return rc; + memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true); + if (!memcpy_real_ptep) + panic("Couldn't setup memcpy real area"); } -/* - * Copy memory in real mode (kernel to kernel) - */ -int memcpy_real(void *dest, unsigned long src, size_t count) +size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) { - unsigned long _dest = (unsigned long)dest; - unsigned long _src = (unsigned long)src; - unsigned long _count = (unsigned long)count; - int rc; - - if (S390_lowcore.nodat_stack != 0) { - preempt_disable(); - rc = call_on_stack(3, S390_lowcore.nodat_stack, - unsigned long, _memcpy_real, - unsigned long, _dest, - unsigned long, _src, - unsigned long, _count); - preempt_enable(); - return rc; + size_t len, copied, res = 0; + unsigned long phys, offset; + void *chunk; + pte_t pte; + + while (count) { + phys = src & PAGE_MASK; + offset = src & ~PAGE_MASK; + chunk = (void *)(__memcpy_real_area + offset); + len = min(count, PAGE_SIZE - offset); + pte = mk_pte_phys(phys, PAGE_KERNEL_RO); + + mutex_lock(&memcpy_real_mutex); + if (pte_val(pte) != pte_val(*memcpy_real_ptep)) { + __ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL); + set_pte(memcpy_real_ptep, pte); + } + copied = copy_to_iter(chunk, len, iter); + mutex_unlock(&memcpy_real_mutex); + + count -= copied; + src += copied; + res += copied; + if (copied < len) + break; } - /* - * This is a really early memcpy_real call, the stacks are - * not set up yet. Just call _memcpy_real on the early boot - * stack - */ - return _memcpy_real(_dest, _src, _count); + return res; } -/* - * Copy memory in absolute mode (kernel to kernel) - */ -void memcpy_absolute(void *dest, void *src, size_t count) +int memcpy_real(void *dest, unsigned long src, size_t count) { - unsigned long cr0, flags, prefix; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); /* disable lowcore protection */ - prefix = store_prefix(); - if (prefix) { - local_mcck_disable(); - set_prefix(0); - memcpy(dest, src, count); - set_prefix(prefix); - local_mcck_enable(); - } else { - memcpy(dest, src, count); - } - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); + struct iov_iter iter; + struct kvec kvec; + + kvec.iov_base = dest; + kvec.iov_len = count; + iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + if (memcpy_real_iter(&iter, src, count) < count) + return -EFAULT; + return 0; } /* - * Check if physical address is within prefix or zero page + * Find CPU that owns swapped prefix page */ -static int is_swapped(phys_addr_t addr) +static int get_swapped_owner(phys_addr_t addr) { phys_addr_t lc; int cpu; - if (addr < sizeof(struct lowcore)) - return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return 1; + return cpu; } - return 0; + return -1; } /* @@ -200,17 +161,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; + struct lowcore *abs_lc; + unsigned long flags; unsigned long size; + int this_cpu, cpu; cpus_read_lock(); - preempt_disable(); - if (is_swapped(addr)) { - size = PAGE_SIZE - (addr & ~PAGE_MASK); - bounce = (void *) __get_free_page(GFP_ATOMIC); - if (bounce) - memcpy_absolute(bounce, ptr, size); + this_cpu = get_cpu(); + if (addr >= sizeof(struct lowcore)) { + cpu = get_swapped_owner(addr); + if (cpu < 0) + goto out; + } + bounce = (void *)__get_free_page(GFP_ATOMIC); + if (!bounce) + goto out; + size = PAGE_SIZE - (addr & ~PAGE_MASK); + if (addr < sizeof(struct lowcore)) { + abs_lc = get_abs_lowcore(&flags); + ptr = (void *)abs_lc + addr; + memcpy(bounce, ptr, size); + put_abs_lowcore(abs_lc, flags); + } else if (cpu == this_cpu) { + ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); + memcpy(bounce, ptr, size); + } else { + memcpy(bounce, ptr, size); } - preempt_enable(); +out: + put_cpu(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c2583f921ca8..ee1a97078527 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -240,7 +240,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && addr && direct && + MACHINE_HAS_EDAT1 && direct && !debug_pagealloc_enabled()) { set_pmd(pmd, __pmd(__pa(addr) | prot)); pages++; @@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && addr && direct && + MACHINE_HAS_EDAT2 && direct && !debug_pagealloc_enabled()) { set_pud(pud, __pud(__pa(addr) | prot)); pages++; @@ -561,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size) } /* + * Allocate new or return existing page-table entry, but do not map it + * to any physical address. If missing, allocate segment- and region- + * table entries along. Meeting a large segment- or region-table entry + * while traversing is an error, since the function is expected to be + * called against virtual regions reserverd for 4KB mappings only. + */ +pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc) +{ + pte_t *ptep = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + if (!alloc) + goto out; + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + if (!alloc) + goto out; + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; + p4d_populate(&init_mm, p4d, pud); + } + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + if (!alloc) + goto out; + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); + } else if (WARN_ON_ONCE(pud_large(*pud))) { + goto out; + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + if (!alloc) + goto out; + pte = vmem_pte_alloc(); + if (!pte) + goto out; + pmd_populate(&init_mm, pmd, pte); + } else if (WARN_ON_ONCE(pmd_large(*pmd))) { + goto out; + } + ptep = pte_offset_kernel(pmd, addr); +out: + return ptep; +} + +int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc) +{ + pte_t *ptep, pte; + + if (!IS_ALIGNED(addr, PAGE_SIZE)) + return -EINVAL; + ptep = vmem_get_alloc_pte(addr, alloc); + if (!ptep) + return -ENOMEM; + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte = mk_pte_phys(phys, prot); + set_pte(ptep, pte); + return 0; +} + +int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) +{ + int rc; + + mutex_lock(&vmem_mutex); + rc = __vmem_map_4k_page(addr, phys, prot, true); + mutex_unlock(&vmem_mutex); + return rc; +} + +void vmem_unmap_4k_page(unsigned long addr) +{ + pte_t *ptep; + + mutex_lock(&vmem_mutex); + ptep = virt_to_kpte(addr); + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte_clear(&init_mm, addr, ptep); + mutex_unlock(&vmem_mutex); +} + +/* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug * additional memory segments. @@ -584,6 +681,9 @@ void __init vmem_map_init(void) __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + /* lowcore requires 4k mapping for real addresses / prefixing */ + set_memory_4k(0, LC_PAGES); + /* lowcore must be executable for LPSWE */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); |