diff options
Diffstat (limited to 'arch/riscv/mm/init.c')
-rw-r--r-- | arch/riscv/mm/init.c | 646 |
1 files changed, 517 insertions, 129 deletions
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 24b2b8044602..50a1b6edd491 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -37,13 +37,21 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +#ifdef CONFIG_64BIT +u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39; +#else +u64 satp_mode __ro_after_init = SATP_MODE_32; +#endif +EXPORT_SYMBOL(satp_mode); + +bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +EXPORT_SYMBOL(pgtable_l4_enabled); +EXPORT_SYMBOL(pgtable_l5_enabled); + phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); -#ifdef CONFIG_XIP_KERNEL -extern char _xiprom[], _exiprom[], __data_loc; -#endif - unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -53,15 +61,6 @@ extern char _start[]; void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; -struct pt_alloc_ops { - pte_t *(*get_pte_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pte)(uintptr_t va); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_t *(*get_pmd_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pmd)(uintptr_t va); -#endif -}; - static phys_addr_t dma32_phys_limit __initdata; static void __init zone_sizes_init(void) @@ -77,35 +76,79 @@ static void __init zone_sizes_init(void) } #if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM) + +#define LOG2_SZ_1K ilog2(SZ_1K) +#define LOG2_SZ_1M ilog2(SZ_1M) +#define LOG2_SZ_1G ilog2(SZ_1G) +#define LOG2_SZ_1T ilog2(SZ_1T) + static inline void print_mlk(char *name, unsigned long b, unsigned long t) { pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t, - (((t) - (b)) >> 10)); + (((t) - (b)) >> LOG2_SZ_1K)); } static inline void print_mlm(char *name, unsigned long b, unsigned long t) { pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t, - (((t) - (b)) >> 20)); + (((t) - (b)) >> LOG2_SZ_1M)); +} + +static inline void print_mlg(char *name, unsigned long b, unsigned long t) +{ + pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld GB)\n", name, b, t, + (((t) - (b)) >> LOG2_SZ_1G)); +} + +#ifdef CONFIG_64BIT +static inline void print_mlt(char *name, unsigned long b, unsigned long t) +{ + pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld TB)\n", name, b, t, + (((t) - (b)) >> LOG2_SZ_1T)); +} +#else +#define print_mlt(n, b, t) do {} while (0) +#endif + +static inline void print_ml(char *name, unsigned long b, unsigned long t) +{ + unsigned long diff = t - b; + + if (IS_ENABLED(CONFIG_64BIT) && (diff >> LOG2_SZ_1T) >= 10) + print_mlt(name, b, t); + else if ((diff >> LOG2_SZ_1G) >= 10) + print_mlg(name, b, t); + else if ((diff >> LOG2_SZ_1M) >= 10) + print_mlm(name, b, t); + else + print_mlk(name, b, t); } static void __init print_vm_layout(void) { pr_notice("Virtual kernel memory layout:\n"); - print_mlk("fixmap", (unsigned long)FIXADDR_START, - (unsigned long)FIXADDR_TOP); - print_mlm("pci io", (unsigned long)PCI_IO_START, - (unsigned long)PCI_IO_END); - print_mlm("vmemmap", (unsigned long)VMEMMAP_START, - (unsigned long)VMEMMAP_END); - print_mlm("vmalloc", (unsigned long)VMALLOC_START, - (unsigned long)VMALLOC_END); - print_mlm("lowmem", (unsigned long)PAGE_OFFSET, - (unsigned long)high_memory); + print_ml("fixmap", (unsigned long)FIXADDR_START, + (unsigned long)FIXADDR_TOP); + print_ml("pci io", (unsigned long)PCI_IO_START, + (unsigned long)PCI_IO_END); + print_ml("vmemmap", (unsigned long)VMEMMAP_START, + (unsigned long)VMEMMAP_END); + print_ml("vmalloc", (unsigned long)VMALLOC_START, + (unsigned long)VMALLOC_END); #ifdef CONFIG_64BIT - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, - (unsigned long)ADDRESS_SPACE_END); + print_ml("modules", (unsigned long)MODULES_VADDR, + (unsigned long)MODULES_END); +#endif + print_ml("lowmem", (unsigned long)PAGE_OFFSET, + (unsigned long)high_memory); + if (IS_ENABLED(CONFIG_64BIT)) { +#ifdef CONFIG_KASAN + print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); #endif + + print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); + } } #else static void print_vm_layout(void) { } @@ -117,31 +160,14 @@ void __init mem_init(void) BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ -#ifdef CONFIG_SWIOTLB - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(dma32_phys_limit)) - swiotlb_init(1); - else - swiotlb_force = SWIOTLB_NO_FORCE; -#endif - high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); + swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); } -/* - * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, - * whereas for 64-bit kernel, the end of the virtual address space is occupied - * by the modules/BPF/kernel mappings which reduces the available size of the - * linear mapping. - * Limit the memory size via mem. - */ -#ifdef CONFIG_64BIT -static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; -#else -static phys_addr_t memory_limit = -PAGE_OFFSET; -#endif +/* Limit the memory size via mem. */ +static phys_addr_t memory_limit; static int __init early_mem(char *p) { @@ -162,35 +188,31 @@ early_param("mem", early_mem); static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); - phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t phys_ram_end; + phys_addr_t max_mapped_addr; + phys_addr_t phys_ram_end, vmlinux_start; -#ifdef CONFIG_XIP_KERNEL - vmlinux_start = __pa_symbol(&_sdata); -#endif + if (IS_ENABLED(CONFIG_XIP_KERNEL)) + vmlinux_start = __pa_symbol(&_sdata); + else + vmlinux_start = __pa_symbol(&_start); memblock_enforce_memory_limit(memory_limit); /* - * Reserve from the start of the kernel to the end of the kernel - */ -#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX) - /* * Make sure we align the reservation on PMD_SIZE since we will * map the kernel in the linear mapping as read-only: we do not want * any allocation to happen between _end and the next pmd aligned page. */ - vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; -#endif + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; + /* + * Reserve from the start of the kernel to the end of the kernel + */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); -#ifndef CONFIG_64BIT -#ifndef CONFIG_XIP_KERNEL - phys_ram_base = memblock_start_of_DRAM(); -#endif + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + phys_ram_base = memblock_start_of_DRAM(); /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -200,13 +222,15 @@ static void __init setup_bootmem(void) * address space is occupied by the kernel mapping then this check must * be done as soon as the kernel mapping base address is determined. */ - max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (phys_ram_end - 1)) - memblock_set_current_limit(max_mapped_addr - 4096); -#endif + if (!IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(~(ulong)0); + if (max_mapped_addr == (phys_ram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + } min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); + high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -218,10 +242,26 @@ static void __init setup_bootmem(void) * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { + /* + * In case the DTB is not located in a memory region we won't + * be able to locate it later on via the linear mapping and + * get a segfault when accessing it via __va(dtb_early_pa). + * To avoid this situation copy DTB to a memory region. + * Note that memblock_phys_alloc will also reserve DTB region. + */ + if (!memblock_is_memory(dtb_early_pa)) { + size_t fdt_size = fdt_totalsize(dtb_early_va); + phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); + void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); + + memcpy(new_dtb_early_va, dtb_early_va, fdt_size); + early_memunmap(new_dtb_early_va, fdt_size); + _dtb_early_pa = new_dtb_early_pa; + } else + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); + } - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); @@ -229,13 +269,7 @@ static void __init setup_bootmem(void) } #ifdef CONFIG_MMU -static struct pt_alloc_ops _pt_ops __initdata; - -#ifdef CONFIG_XIP_KERNEL -#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) -#else -#define pt_ops _pt_ops -#endif +struct pt_alloc_ops pt_ops __initdata; unsigned long riscv_pfn_base __ro_after_init; EXPORT_SYMBOL(riscv_pfn_base); @@ -245,14 +279,38 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL +#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) +#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) #endif /* CONFIG_XIP_KERNEL */ +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READ, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_EXEC, + [VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READ, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_EXEC, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READ_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_EXEC +}; +DECLARE_VM_GET_PAGE_PROT + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long addr = __fix_to_virt(idx); @@ -333,6 +391,26 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) #endif /* CONFIG_XIP_KERNEL */ +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss; +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d)) +#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d)) +#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d)) +#endif /* CONFIG_XIP_KERNEL */ + +static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) +#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) +#define early_pud ((pud_t *)XIP_FIXUP(early_pud)) +#endif /* CONFIG_XIP_KERNEL */ + static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { /* Before MMU is enabled */ @@ -352,7 +430,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); return (uintptr_t)early_pmd; } @@ -367,7 +445,8 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va) unsigned long vaddr; vaddr = __get_free_page(GFP_KERNEL); - BUG_ON(!vaddr); + BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr))); + return __pa(vaddr); } @@ -398,21 +477,170 @@ static void __init create_pmd_mapping(pmd_t *pmdp, create_pte_mapping(ptep, va, pa, sz, prot); } -#define pgd_next_t pmd_t -#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) -#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) +static pud_t *__init get_pud_virt_early(phys_addr_t pa) +{ + return (pud_t *)((uintptr_t)pa); +} + +static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PUD); + return (pud_t *)set_fixmap_offset(FIX_PUD, pa); +} + +static pud_t *__init get_pud_virt_late(phys_addr_t pa) +{ + return (pud_t *)__va(pa); +} + +static phys_addr_t __init alloc_pud_early(uintptr_t va) +{ + /* Only one PUD is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_pud; +} + +static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pud_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa) +{ + return (p4d_t *)((uintptr_t)pa); +} + +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_P4D); + return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); +} + +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) +{ + return (p4d_t *)__va(pa); +} + +static phys_addr_t __init alloc_p4d_early(uintptr_t va) +{ + /* Only one P4D is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_p4d; +} + +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_p4d_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + +static void __init create_pud_mapping(pud_t *pudp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pmd_t *nextp; + phys_addr_t next_phys; + uintptr_t pud_index = pud_index(va); + + if (sz == PUD_SIZE) { + if (pud_val(pudp[pud_index]) == 0) + pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); + return; + } + + if (pud_val(pudp[pud_index]) == 0) { + next_phys = pt_ops.alloc_pmd(va); + pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pmd_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); + nextp = pt_ops.get_pmd_virt(next_phys); + } + + create_pmd_mapping(nextp, va, pa, sz, prot); +} + +static void __init create_p4d_mapping(p4d_t *p4dp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pud_t *nextp; + phys_addr_t next_phys; + uintptr_t p4d_index = p4d_index(va); + + if (sz == P4D_SIZE) { + if (p4d_val(p4dp[p4d_index]) == 0) + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); + return; + } + + if (p4d_val(p4dp[p4d_index]) == 0) { + next_phys = pt_ops.alloc_pud(va); + p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pud_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); + nextp = pt_ops.get_pud_virt(next_phys); + } + + create_pud_mapping(nextp, va, pa, sz, prot); +} + +#define pgd_next_t p4d_t +#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \ + pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \ + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))) +#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \ + pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \ + pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa))) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ - create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pmd + (pgtable_l5_enabled ? \ + create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \ + (pgtable_l4_enabled ? \ + create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \ + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))) +#define fixmap_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)) +#define trampoline_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +#define early_dtb_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pte -#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) -#endif +#define fixmap_pgd_next ((uintptr_t)fixmap_pte) +#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +#endif /* __PAGETABLE_PMD_FOLDED */ void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, @@ -451,6 +679,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) } #ifdef CONFIG_XIP_KERNEL +#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) +extern char _xiprom[], _exiprom[], __data_loc; + /* called from head.S with MMU off */ asmlinkage void __init __copy_data(void) { @@ -499,6 +730,77 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) } #endif /* CONFIG_STRICT_KERNEL_RWX */ +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) +static void __init disable_pgtable_l5(void) +{ + pgtable_l5_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L4; + satp_mode = SATP_MODE_48; +} + +static void __init disable_pgtable_l4(void) +{ + pgtable_l4_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L3; + satp_mode = SATP_MODE_39; +} + +/* + * There is a simple way to determine if 4-level is supported by the + * underlying hardware: establish 1:1 mapping in 4-level page table mode + * then read SATP to see if the configuration was taken into account + * meaning sv48 is supported. + */ +static __init void set_satp_mode(void) +{ + u64 identity_satp, hw_satp; + uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; + bool check_l4 = false; + + create_p4d_mapping(early_p4d, + set_satp_mode_pmd, (uintptr_t)early_pud, + P4D_SIZE, PAGE_TABLE); + create_pud_mapping(early_pud, + set_satp_mode_pmd, (uintptr_t)early_pmd, + PUD_SIZE, PAGE_TABLE); + /* Handle the case where set_satp_mode straddles 2 PMDs */ + create_pmd_mapping(early_pmd, + set_satp_mode_pmd, set_satp_mode_pmd, + PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(early_pmd, + set_satp_mode_pmd + PMD_SIZE, + set_satp_mode_pmd + PMD_SIZE, + PMD_SIZE, PAGE_KERNEL_EXEC); +retry: + create_pgd_mapping(early_pg_dir, + set_satp_mode_pmd, + check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + PGDIR_SIZE, PAGE_TABLE); + + identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; + + local_flush_tlb_all(); + csr_write(CSR_SATP, identity_satp); + hw_satp = csr_swap(CSR_SATP, 0ULL); + local_flush_tlb_all(); + + if (hw_satp != identity_satp) { + if (!check_l4) { + disable_pgtable_l5(); + check_l4 = true; + memset(early_pg_dir, 0, PAGE_SIZE); + goto retry; + } + disable_pgtable_l4(); + } + + memset(early_pg_dir, 0, PAGE_SIZE); + memset(early_p4d, 0, PAGE_SIZE); + memset(early_pud, 0, PAGE_SIZE); + memset(early_pmd, 0, PAGE_SIZE); +} +#endif + /* * setup_vm() is called from head.S with MMU-off. * @@ -563,10 +865,18 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa, + IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); + if (pgtable_l5_enabled) + create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); + + if (pgtable_l4_enabled) + create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); + if (IS_ENABLED(CONFIG_64BIT)) { create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, pa, PMD_SIZE, PAGE_KERNEL); @@ -588,11 +898,70 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) dtb_early_pa = dtb_pa; } +/* + * MMU is not enabled, the page tables are allocated directly using + * early_pmd/pud/p4d and the address returned is the physical one. + */ +static void __init pt_ops_set_early(void) +{ + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; + pt_ops.alloc_pud = alloc_pud_early; + pt_ops.get_pud_virt = get_pud_virt_early; + pt_ops.alloc_p4d = alloc_p4d_early; + pt_ops.get_p4d_virt = get_p4d_virt_early; +#endif +} + +/* + * MMU is enabled but page table setup is not complete yet. + * fixmap page table alloc functions must be used as a means to temporarily + * map the allocated physical pages since the linear mapping does not exist yet. + * + * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, + * but it will be used as described above. + */ +static void __init pt_ops_set_fixmap(void) +{ + pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); +#endif +} + +/* + * MMU is enabled and page table setup is complete, so from now, we can use + * generic page allocation functions to setup page table. + */ +static void __init pt_ops_set_late(void) +{ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; + pt_ops.alloc_pud = alloc_pud_late; + pt_ops.get_pud_virt = get_pud_virt_late; + pt_ops.alloc_p4d = alloc_p4d_late; + pt_ops.get_p4d_virt = get_p4d_virt_late; +#endif +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; kernel_map.virt_addr = KERNEL_LINK_ADDR; + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); #ifdef CONFIG_XIP_KERNEL kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; @@ -607,11 +976,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; #endif + +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) + set_satp_mode(); +#endif + kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); + /* + * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit + * kernel, whereas for 64-bit kernel, the end of the virtual address + * space is occupied by the modules/BPF/kernel mappings which reduces + * the available size of the linear mapping. + */ + memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); + /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); @@ -624,23 +1006,33 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif - pt_ops.alloc_pte = alloc_pte_early; - pt_ops.get_pte_virt = get_pte_virt_early; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_early; - pt_ops.get_pmd_virt = get_pmd_virt_early; -#endif + apply_early_boot_alternatives(); + pt_ops_set_early(); + /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, - (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); + fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - /* Setup fixmap PMD */ + /* Setup fixmap P4D and PUD */ + if (pgtable_l5_enabled) + create_p4d_mapping(fixmap_p4d, FIXADDR_START, + (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE); + /* Setup fixmap PUD and PMD */ + if (pgtable_l4_enabled) + create_pud_mapping(fixmap_pud, FIXADDR_START, + (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, - (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l5_enabled) + create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr, + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE); + if (pgtable_l4_enabled) + create_pud_mapping(trampoline_pud, kernel_map.virt_addr, + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); #ifdef CONFIG_XIP_KERNEL create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); @@ -668,7 +1060,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap * range can not span multiple pmds. */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); #ifndef __PAGETABLE_PMD_FOLDED @@ -693,6 +1085,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); } #endif + + pt_ops_set_fixmap(); } static void __init setup_vm_final(void) @@ -701,16 +1095,6 @@ static void __init setup_vm_final(void) phys_addr_t pa, start, end; u64 i; - /** - * MMU is enabled at this point. But page table setup is not complete yet. - * fixmap page table alloc functions should be used at this point - */ - pt_ops.alloc_pte = alloc_pte_fixmap; - pt_ops.get_pte_virt = get_pte_virt_fixmap; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_fixmap; - pt_ops.get_pmd_virt = get_pmd_virt_fixmap; -#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), @@ -735,26 +1119,25 @@ static void __init setup_vm_final(void) } } -#ifdef CONFIG_64BIT /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, false); + if (IS_ENABLED(CONFIG_64BIT)) + create_kernel_page_table(swapper_pg_dir, false); + +#ifdef CONFIG_KASAN + kasan_swapper_init(); #endif /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); + clear_fixmap(FIX_PUD); + clear_fixmap(FIX_P4D); /* Move to swapper page table */ - csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); local_flush_tlb_all(); - /* generic page allocation functions must be used to setup page table */ - pt_ops.alloc_pte = alloc_pte_late; - pt_ops.get_pte_virt = get_pte_virt_late; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_late; - pt_ops.get_pmd_virt = get_pmd_virt_late; -#endif + pt_ops_set_late(); } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -768,7 +1151,6 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ -#ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -785,17 +1167,17 @@ static void __init reserve_crashkernel(void) int ret = 0; + if (!IS_ENABLED(CONFIG_KEXEC_CORE)) + return; /* * Don't reserve a region for a crash kernel on a crash kernel * since it doesn't make much sense and we have limited memory * resources. */ -#ifdef CONFIG_CRASH_DUMP if (is_kdump_kernel()) { pr_info("crashkernel: ignoring reservation request\n"); return; } -#endif ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); @@ -812,13 +1194,22 @@ static void __init reserve_crashkernel(void) /* * Current riscv boot protocol requires 2MB alignment for * RV64 and 4MB alignment for RV32 (hugepage size) + * + * Try to alloc from 32bit addressible physical memory so that + * swiotlb can work on the crash kernel. */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, - search_start, search_end); + search_start, + min(search_end, (unsigned long) SZ_4G)); if (crash_base == 0) { - pr_warn("crashkernel: couldn't allocate %lldKB\n", - crash_size >> 10); - return; + /* Try again without restricting region to 32bit addressible memory */ + crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, + search_start, search_end); + if (crash_base == 0) { + pr_warn("crashkernel: couldn't allocate %lldKB\n", + crash_size >> 10); + return; + } } pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", @@ -827,7 +1218,6 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -#endif /* CONFIG_KEXEC_CORE */ void __init paging_init(void) { @@ -841,9 +1231,7 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); -#ifdef CONFIG_KEXEC_CORE reserve_crashkernel(); -#endif memblock_dump_all(); } |