diff options
Diffstat (limited to 'arch/arm64/mm/hugetlbpage.c')
-rw-r--r-- | arch/arm64/mm/hugetlbpage.c | 283 |
1 files changed, 180 insertions, 103 deletions
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index bbeb6a5a6ba6..35e9a468d13e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -17,26 +17,73 @@ #include <asm/mman.h> #include <asm/tlb.h> #include <asm/tlbflush.h> -#include <asm/pgalloc.h> -#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION -bool arch_hugetlb_migration_supported(struct hstate *h) +/* + * HugeTLB Support Matrix + * + * --------------------------------------------------- + * | Page Size | CONT PTE | PMD | CONT PMD | PUD | + * --------------------------------------------------- + * | 4K | 64K | 2M | 32M | 1G | + * | 16K | 2M | 32M | 1G | | + * | 64K | 2M | 512M | 16G | | + * --------------------------------------------------- + */ + +/* + * Reserve CMA areas for the largest supported gigantic + * huge page when requested. Any other smaller gigantic + * huge pages could still be served from those areas. + */ +#ifdef CONFIG_CMA +void __init arm64_hugetlb_cma_reserve(void) { - size_t pagesize = huge_page_size(h); + int order; + + if (pud_sect_supported()) + order = PUD_SHIFT - PAGE_SHIFT; + else + order = CONT_PMD_SHIFT - PAGE_SHIFT; - switch (pagesize) { -#ifdef CONFIG_ARM64_4K_PAGES + /* + * HugeTLB CMA reservation is required for gigantic + * huge pages which could not be allocated via the + * page allocator. Just warn if there is any change + * breaking this assumption. + */ + WARN_ON(order <= MAX_ORDER); + hugetlb_cma_reserve(order); +} +#endif /* CONFIG_CMA */ + +static bool __hugetlb_valid_size(unsigned long size) +{ + switch (size) { +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif - case PMD_SIZE: case CONT_PMD_SIZE: + case PMD_SIZE: case CONT_PTE_SIZE: return true; } - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); + return false; } + +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + size_t pagesize = huge_page_size(h); + + if (!__hugetlb_valid_size(pagesize)) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + return false; + } + return true; +} #endif int pmd_huge(pmd_t pmd) @@ -53,25 +100,17 @@ int pud_huge(pud_t pud) #endif } -/* - * Select all bits except the pfn - */ -static inline pgprot_t pte_pgprot(pte_t pte) -{ - unsigned long pfn = pte_pfn(pte); - - return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); -} - static int find_num_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, size_t *pgsize) { pgd_t *pgdp = pgd_offset(mm, addr); + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; *pgsize = PAGE_SIZE; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); pmdp = pmd_offset(pudp, addr); if ((pte_t *)pmdp == ptep) { *pgsize = PMD_SIZE; @@ -87,8 +126,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = size; switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + if (pud_sect_supported()) + contig_ptes = 1; + break; #endif case PMD_SIZE: contig_ptes = 1; @@ -106,6 +148,28 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) return contig_ptes; } +pte_t huge_ptep_get(pte_t *ptep) +{ + int ncontig, i; + size_t pgsize; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_present(orig_pte) || !pte_cont(orig_pte)) + return orig_pte; + + ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize); + for (i = 0; i < ncontig; i++, ptep++) { + pte_t pte = ptep_get(ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + return orig_pte; +} + /* * Changing some bits of contiguous entries requires us to follow a * Break-Before-Make approach, breaking the whole contiguous set @@ -114,15 +178,14 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) * * This helper performs the break step. */ -static pte_t get_clear_flush(struct mm_struct *mm, +static pte_t get_clear_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = huge_ptep_get(ptep); - bool valid = pte_valid(orig_pte); - unsigned long i, saddr = addr; + pte_t orig_pte = ptep_get(ptep); + unsigned long i; for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -138,11 +201,19 @@ static pte_t get_clear_flush(struct mm_struct *mm, if (pte_young(pte)) orig_pte = pte_mkyoung(orig_pte); } + return orig_pte; +} - if (valid) { - struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); - flush_tlb_range(&vma, saddr, addr); - } +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + + flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); return orig_pte; } @@ -170,6 +241,13 @@ static void clear_flush(struct mm_struct *mm, flush_tlb_range(&vma, saddr, addr); } +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry) +{ + VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry)); + + return page_folio(pfn_to_page(swp_offset_pfn(entry))); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -179,11 +257,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long pfn, dpfn; pgprot_t hugeprot; - /* - * Code needs to be expanded to handle huge swap and migration - * entries. Needed for HUGETLB and MEMORY_FAILURE. - */ - WARN_ON(!pte_present(pte)); + if (!pte_present(pte)) { + struct folio *folio; + + folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte)); + ncontig = num_contig_ptes(folio_size(folio), &pgsize); + + for (i = 0; i < ncontig; i++, ptep++) + set_pte_at(mm, addr, ptep, pte); + return; + } if (!pte_cont(pte)) { set_pte_at(mm, addr, ptep, pte); @@ -201,28 +284,18 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); } -void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte, unsigned long sz) -{ - int i, ncontig; - size_t pgsize; - - ncontig = num_contig_ptes(sz, &pgsize); - - for (i = 0; i < ncontig; i++, ptep++) - set_pte(ptep, pte); -} - -pte_t *huge_pte_alloc(struct mm_struct *mm, +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep = NULL; pgdp = pgd_offset(mm, addr); - pudp = pud_alloc(mm, pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + pudp = pud_alloc(mm, p4dp, addr); if (!pudp) return NULL; @@ -230,6 +303,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = (pte_t *)pudp; } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + return NULL; WARN_ON(addr & (sz - 1)); /* @@ -241,9 +316,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, */ ptep = pte_alloc_map(mm, pmdp, addr); } else if (sz == PMD_SIZE) { - if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && - pud_none(READ_ONCE(*pudp))) - ptep = huge_pmd_share(mm, addr, pudp); + if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp))) + ptep = huge_pmd_share(mm, vma, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); } else if (sz == (CONT_PMD_SIZE)) { @@ -259,6 +333,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp, pud; pmd_t *pmdp, pmd; @@ -266,7 +341,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (!pgd_present(READ_ONCE(*pgdp))) return NULL; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + if (!p4d_present(READ_ONCE(*p4dp))) + return NULL; + + pudp = pud_offset(p4dp, addr); pud = READ_ONCE(*pudp); if (sz != PUD_SIZE && pud_none(pud)) return NULL; @@ -292,11 +371,33 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; } -pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, - struct page *page, int writable) +unsigned long hugetlb_mask_last_page(struct hstate *h) +{ + unsigned long hp_size = huge_page_size(h); + + switch (hp_size) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + return PGDIR_SIZE - PUD_SIZE; +#endif + case CONT_PMD_SIZE: + return PUD_SIZE - CONT_PMD_SIZE; + case PMD_SIZE: + return PUD_SIZE - PMD_SIZE; + case CONT_PTE_SIZE: + return PMD_SIZE - CONT_PTE_SIZE; + default: + break; + } + + return 0UL; +} + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { - size_t pagesize = huge_page_size(hstate_vma(vma)); + size_t pagesize = 1UL << shift; + entry = pte_mkhuge(entry); if (pagesize == CONT_PTE_SIZE) { entry = pte_mkcont(entry); } else if (pagesize == CONT_PMD_SIZE) { @@ -325,14 +426,14 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, { int ncontig; size_t pgsize; - pte_t orig_pte = huge_ptep_get(ptep); + pte_t orig_pte = ptep_get(ptep); if (!pte_cont(orig_pte)) return ptep_get_and_clear(mm, addr, ptep); ncontig = find_num_contig(mm, addr, ptep, &pgsize); - return get_clear_flush(mm, addr, ptep, pgsize, ncontig); + return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } /* @@ -348,11 +449,11 @@ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) { int i; - if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + if (pte_write(pte) != pte_write(ptep_get(ptep))) return 1; for (i = 0; i < ncontig; i++) { - pte_t orig_pte = huge_ptep_get(ptep + i); + pte_t orig_pte = ptep_get(ptep + i); if (pte_dirty(pte) != pte_dirty(orig_pte)) return 1; @@ -371,19 +472,20 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; + struct mm_struct *mm = vma->vm_mm; pgprot_t hugeprot; pte_t orig_pte; if (!pte_cont(pte)) return ptep_set_access_flags(vma, addr, ptep, pte, dirty); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; if (!__cont_access_flags_changed(ptep, pte, ncontig)) return 0; - orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) @@ -394,7 +496,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) - set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); return 1; } @@ -416,7 +518,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; - pte = get_clear_flush(mm, addr, ptep, pgsize, ncontig); + pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); pte = pte_wrprotect(pte); hugeprot = pte_pgprot(pte); @@ -426,59 +528,34 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); } -void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { + struct mm_struct *mm = vma->vm_mm; size_t pgsize; int ncontig; - if (!pte_cont(READ_ONCE(*ptep))) { - ptep_clear_flush(vma, addr, ptep); - return; - } + if (!pte_cont(READ_ONCE(*ptep))) + return ptep_clear_flush(vma, addr, ptep); - ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); - clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); -} - -static void __init add_huge_page_size(unsigned long size) -{ - if (size_to_hstate(size)) - return; - - hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT); + ncontig = find_num_contig(mm, addr, ptep, &pgsize); + return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig); } static int __init hugetlbpage_init(void) { -#ifdef CONFIG_ARM64_4K_PAGES - add_huge_page_size(PUD_SIZE); -#endif - add_huge_page_size(CONT_PMD_SIZE); - add_huge_page_size(PMD_SIZE); - add_huge_page_size(CONT_PTE_SIZE); + if (pud_sect_supported()) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); return 0; } arch_initcall(hugetlbpage_init); -static __init int setup_hugepagesz(char *opt) +bool __init arch_hugetlb_valid_size(unsigned long size) { - unsigned long ps = memparse(opt, &opt); - - switch (ps) { -#ifdef CONFIG_ARM64_4K_PAGES - case PUD_SIZE: -#endif - case CONT_PMD_SIZE: - case PMD_SIZE: - case CONT_PTE_SIZE: - add_huge_page_size(ps); - return 1; - } - - hugetlb_bad_size(); - pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); - return 0; + return __hugetlb_valid_size(size); } -__setup("hugepagesz=", setup_hugepagesz); |