diff options
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r-- | arch/arm64/mm/dma-mapping.c | 4 | ||||
-rw-r--r-- | arch/arm64/mm/dump.c | 24 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 101 | ||||
-rw-r--r-- | arch/arm64/mm/init.c | 35 | ||||
-rw-r--r-- | arch/arm64/mm/ioremap.c | 4 | ||||
-rw-r--r-- | arch/arm64/mm/kasan_init.c | 9 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 32 | ||||
-rw-r--r-- | arch/arm64/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/pageattr.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/proc.S | 20 |
10 files changed, 167 insertions, 66 deletions
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1d3f0b5a9940..bd2b039f43a6 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -14,9 +14,7 @@ pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { - if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) - return pgprot_writecombine(prot); - return prot; + return pgprot_writecombine(prot); } void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 82b3a7fdb4a6..93f9f77582ae 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -25,9 +25,20 @@ #include <asm/pgtable-hwdef.h> #include <asm/ptdump.h> -static const struct addr_marker address_markers[] = { + +enum address_markers_idx { + PAGE_OFFSET_NR = 0, + PAGE_END_NR, #ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "Kasan shadow start" }, + KASAN_START_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, +#ifdef CONFIG_KASAN + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif { MODULES_VADDR, "Modules start" }, @@ -42,7 +53,6 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; @@ -376,7 +386,7 @@ static void ptdump_initialize(void) static struct ptdump_info kernel_ptdump_info = { .mm = &init_mm, .markers = address_markers, - .base_addr = VA_START, + .base_addr = PAGE_OFFSET, }; void ptdump_check_wx(void) @@ -390,7 +400,7 @@ void ptdump_check_wx(void) .check_wx = true, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, &init_mm, PAGE_OFFSET); note_page(&st, 0, 0, 0); if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", @@ -401,6 +411,10 @@ void ptdump_check_wx(void) static int ptdump_init(void) { + address_markers[PAGE_END_NR].start_address = PAGE_END; +#ifdef CONFIG_KASAN + address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; +#endif ptdump_initialize(); ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); return 0; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9568c116ac7f..115d7a0e4b08 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -8,6 +8,7 @@ */ #include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/signal.h> #include <linux/mm.h> @@ -86,8 +87,8 @@ static void mem_abort_decode(unsigned int esr) pr_alert("Mem abort info:\n"); pr_alert(" ESR = 0x%08x\n", esr); - pr_alert(" Exception class = %s, IL = %u bits\n", - esr_get_class_string(esr), + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", + ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); pr_alert(" SET = %lu, FnV = %lu\n", (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, @@ -109,7 +110,7 @@ static inline bool is_ttbr0_addr(unsigned long addr) static inline bool is_ttbr1_addr(unsigned long addr) { /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= VA_START; + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; } /* @@ -138,10 +139,9 @@ static void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? VA_BITS : (int)vabits_user, - (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, (unsigned long)virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -242,6 +242,34 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, return false; } +static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + unsigned long flags; + u64 par, dfsc; + + if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || + (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) + return false; + + local_irq_save(flags); + asm volatile("at s1e1r, %0" :: "r" (addr)); + isb(); + par = read_sysreg(par_el1); + local_irq_restore(flags); + + if (!(par & SYS_PAR_EL1_F)) + return false; + + /* + * If we got a different type of fault from the AT instruction, + * treat the translation fault as spurious. + */ + dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par); + return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; +} + static void die_kernel_fault(const char *msg, unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -270,6 +298,10 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; + if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), + "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) + return; + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; @@ -777,6 +809,53 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } +/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +static void debug_exception_enter(struct pt_regs *regs) +{ + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + } else { + /* + * We might have interrupted pretty much anything. In + * fact, if we're a debug exception, we can even interrupt + * NMI processing. We don't want this code makes in_nmi() + * to return true, but we need to notify RCU. + */ + rcu_nmi_enter(); + } + + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +static void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); + + if (!user_mode(regs)) + rcu_nmi_exit(); + + if (interrupts_enabled(regs)) + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); @@ -817,12 +896,7 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, if (cortex_a76_erratum_1463225_debug_handler(regs)) return; - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); @@ -832,7 +906,6 @@ asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, inf->sig, inf->code, (void __user *)pc, esr); } - if (interrupts_enabled(regs)) - trace_hardirqs_on(); + debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3c795278def..45c00a54909c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -50,6 +50,12 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); +s64 physvirt_offset __ro_after_init; +EXPORT_SYMBOL(physvirt_offset); + +struct page *vmemmap __ro_after_init; +EXPORT_SYMBOL(vmemmap); + phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -301,7 +307,7 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = -(s64)PAGE_OFFSET; + const s64 linear_region_size = BIT(vabits_actual - 1); /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); @@ -310,18 +316,25 @@ void __init arm64_memblock_init(void) memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /* - * Ensure that the linear region takes up exactly half of the kernel - * virtual address space. This way, we can distinguish a linear address - * from a kernel/module/vmalloc address by testing a single bit. - */ - BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); - - /* * Select a suitable value for the base of physical memory. */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + physvirt_offset = PHYS_OFFSET - PAGE_OFFSET; + + vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)); + + /* + * If we are running with a 52-bit kernel VA config on a system that + * does not support it, we have to offset our vmemmap and physvirt_offset + * s.t. we avoid the 52-bit portion of the direct linear map + */ + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) { + vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT; + physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48); + } + /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be @@ -570,8 +583,12 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { + unsigned long aligned_start, aligned_end; + + aligned_start = __virt_to_phys(start) & PAGE_MASK; + aligned_end = PAGE_ALIGN(__virt_to_phys(end)); + memblock_free(aligned_start, aligned_end - aligned_start); free_reserved_area((void *)start, (void *)end, 0, "initrd"); - memblock_free(__virt_to_phys(start), end - start); } #endif diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index fdb595a5d65f..9be71bee902c 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -69,7 +69,7 @@ void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) } EXPORT_SYMBOL(__ioremap); -void __iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *io_addr) { unsigned long addr = (unsigned long)io_addr & PAGE_MASK; @@ -80,7 +80,7 @@ void __iounmap(volatile void __iomem *io_addr) if (is_vmalloc_addr((void *)addr)) vunmap((void *)addr); } -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(iounmap); void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6cf97b904ebb..f87a32484ea8 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -156,7 +156,8 @@ asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); @@ -225,10 +226,10 @@ void __init kasan_init(void) kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)mod_shadow_start); + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), + (void *)mod_shadow_start); kasan_populate_early_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + (void *)KASAN_SHADOW_END); if (kimg_shadow_start > mod_shadow_end) kasan_populate_early_shadow((void *)mod_shadow_end, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 750a69dde39b..53dc6f24cfb7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,8 +40,9 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; -u64 vabits_user __ro_after_init; -EXPORT_SYMBOL(vabits_user); + +u64 __section(".mmuoff.data.write") vabits_actual; +EXPORT_SYMBOL(vabits_actual); u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -398,7 +399,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -425,7 +426,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -646,6 +647,8 @@ static void __init map_kernel(pgd_t *pgdp) set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), READ_ONCE(*pgd_offset_k(FIXADDR_START))); } else if (CONFIG_PGTABLE_LEVELS > 3) { + pgd_t *bm_pgdp; + pud_t *bm_pudp; /* * The fixmap shares its top level pgd entry with the kernel * mapping. This can really only occur when we are running @@ -653,9 +656,9 @@ static void __init map_kernel(pgd_t *pgdp) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud_populate(&init_mm, - pud_set_fixmap_offset(pgdp, FIXADDR_START), - lm_alias(bm_pmd)); + bm_pgdp = pgd_offset_raw(pgdp, FIXADDR_START); + bm_pudp = pud_set_fixmap_offset(bm_pgdp, FIXADDR_START); + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -876,7 +879,7 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); int offset; @@ -929,19 +932,6 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) -{ - void *dt_virt; - int size; - - dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); - if (!dt_virt) - return NULL; - - memblock_reserve(dt_phys, size); - return dt_virt; -} - int __init arch_ioremap_p4d_supported(void) { return 0; diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4f241cc7cc3b..4decf1659700 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -29,7 +29,7 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) + if (str_has_prefix(opt, "off")) numa_off = true; return 0; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 03c53f16ee77..9ce7bd9d4d9c 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -128,7 +128,6 @@ int set_memory_nx(unsigned long addr, int numpages) __pgprot(PTE_PXN), __pgprot(0)); } -EXPORT_SYMBOL_GPL(set_memory_nx); int set_memory_x(unsigned long addr, int numpages) { @@ -136,7 +135,6 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(0), __pgprot(PTE_PXN)); } -EXPORT_SYMBOL_GPL(set_memory_x); int set_memory_valid(unsigned long addr, int numpages, int enable) { diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7dbf2be470f6..a1e0592d1fbc 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -168,7 +168,7 @@ ENDPROC(cpu_do_switch_mm) .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, empty_zero_page phys_to_ttbr \tmp2, \tmp1 - offset_ttbr1 \tmp2 + offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 isb tlbi vmalle1 @@ -187,7 +187,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - offset_ttbr1 x0 + offset_ttbr1 x0, x3 msr ttbr1_el1, x0 isb @@ -286,6 +286,15 @@ skip_pgd: msr sctlr_el1, x18 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb + /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret @@ -362,7 +371,7 @@ __idmap_kpti_secondary: cbnz w18, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb + offset_ttbr1 swapper_ttb, x18 msr ttbr1_el1, swapper_ttb isb ret @@ -438,10 +447,11 @@ ENTRY(__cpu_setup) TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS tcr_clear_errata_bits x10, x9, x5 -#ifdef CONFIG_ARM64_USER_VA_BITS_52 - ldr_l x9, vabits_user +#ifdef CONFIG_ARM64_VA_BITS_52 + ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 + tcr_set_t1sz x10, x9 #else ldr_l x9, idmap_t0sz #endif |