diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 238 |
1 files changed, 105 insertions, 133 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 33b3461d91e8..5852a86d990d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,18 +19,19 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/kmemleak.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/setup.h> #include <asm/hugetlb.h> #include <asm/pte-walk.h> +#include <asm/firmware.h> bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -53,24 +54,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - new = NULL; - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = NULL; - num_hugepd = 1; - new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; - new = NULL; } - if (!cachep && !new) { + if (!cachep) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - if (cachep) - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -101,10 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - if (cachep) - kmem_cache_free(cachep, new); - else - pte_free(mm, new); + kmem_cache_free(cachep, new); } else { kmemleak_ignore(new); } @@ -116,9 +107,11 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * At this point we do the placement change only for BOOK3S 64. This would * possibly work on other subarchs. */ -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) { pgd_t *pg; + p4d_t *p4; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; @@ -128,20 +121,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); + p4 = p4d_offset(pg, addr); #ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ - return (pte_t *) pg; + return (pte_t *) p4; else if (pshift > PUD_SHIFT) { /* * We need to use hugepd table */ ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift == PUD_SHIFT) @@ -166,10 +160,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #else if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift >= PUD_SHIFT) { @@ -188,6 +182,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -222,7 +219,7 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p } } -int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) +static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -233,17 +230,22 @@ int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } + +bool __init hugetlb_node_alloc_supported(void) +{ + return false; +} #endif -int __init alloc_bootmem_huge_page(struct hstate *h) +int __init alloc_bootmem_huge_page(struct hstate *h, int nid) { #ifdef CONFIG_PPC_BOOK3S_64 if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) return pseries_alloc_bootmem_huge_page(h); #endif - return __alloc_bootmem_huge_page(h); + return __alloc_bootmem_huge_page(h, nid); } #ifndef CONFIG_PPC_BOOK3S_64 @@ -253,7 +255,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) struct hugepd_freelist { struct rcu_head rcu; unsigned int index; - void *ptes[0]; + void *ptes[]; }; static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); @@ -299,6 +301,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {} #endif +/* Return true when the entry to be freed maps more than the area being freed */ +static bool range_is_outside_limits(unsigned long start, unsigned long end, + unsigned long floor, unsigned long ceiling, + unsigned long mask) +{ + if ((start & mask) < floor) + return true; + if (ceiling) { + ceiling &= mask; + if (!ceiling) + return true; + } + return end - 1 > ceiling - 1; +} + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -314,15 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift > pdshift) num_hugepd = 1 << (shift - pdshift); - start &= pdmask; - if (start < floor) - return; - if (ceiling) { - ceiling &= pdmask; - if (! ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, pdmask)) return; for (i = 0; i < num_hugepd; i++, hpdp++) @@ -330,13 +339,25 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pgtable_t token = pmd_pgtable(*pmd); + + if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK)) + return; + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -352,11 +373,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); + continue; } /* @@ -365,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd)); if (more > next) next = more; @@ -373,24 +400,16 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, addr, next, floor, ceiling); } while (addr = next, addr != end); - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK)) return; - pmd = pmd_offset(pud, start); + pmd = pmd_offset(pud, start & PUD_MASK); pud_clear(pud); - pmd_free_tlb(tlb, pmd, start); + pmd_free_tlb(tlb, pmd, start & PUD_MASK); mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -400,7 +419,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, start = addr; do { - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); next = pud_addr_end(addr, end); if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) @@ -415,7 +434,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * single hugepage, but all of them point to * the same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pud)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud)); if (more > next) next = more; @@ -424,20 +443,12 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, } } while (addr = next, addr != end); - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) + if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK)) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); - pud_free_tlb(tlb, pud, start); + pud = pud_offset(p4d, start & PGDIR_MASK); + p4d_clear(p4d); + pud_free_tlb(tlb, pud, start & PGDIR_MASK); mm_dec_nr_puds(tlb->mm); } @@ -449,6 +460,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; /* @@ -471,10 +483,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { - if (pgd_none_or_clear_bad(pgd)) + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); } else { unsigned long more; /* @@ -483,11 +496,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * for a single hugepage, but all of them point to the * same kmem cache that holds the hugepte. */ - more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); + more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd)); if (more > next) next = more; - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, addr, next, floor, ceiling); } } while (addr = next, addr != end); @@ -530,35 +543,7 @@ retry: return page; } -#ifdef CONFIG_PPC_MM_SLICES -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct hstate *hstate = hstate_file(file); - int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - -#ifdef CONFIG_PPC_RADIX_MMU - if (radix_enabled()) - return radix__hugetlb_get_unmapped_area(file, addr, len, - pgoff, flags); -#endif - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); -} -#endif - -unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) -{ - /* With radix we don't use slice, so derive it from vma*/ - if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); - - return 1UL << mmu_psize_to_shift(psize); - } - return vma_kernel_pagesize(vma); -} - -static int __init add_huge_page_size(unsigned long long size) +bool __init arch_hugetlb_valid_size(unsigned long size) { int shift = __ffs(size); int mmu_psize; @@ -566,37 +551,27 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ if (size <= PAGE_SIZE || !is_power_of_2(size)) - return -EINVAL; + return false; mmu_psize = check_and_get_huge_psize(shift); if (mmu_psize < 0) - return -EINVAL; + return false; BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); - /* Return if huge page size has already been setup */ - if (size_to_hstate(size)) - return 0; - - hugetlb_add_hstate(shift - PAGE_SHIFT); - - return 0; + return true; } -static int __init hugepage_setup_sz(char *str) +static int __init add_huge_page_size(unsigned long long size) { - unsigned long long size; - - size = memparse(str, &str); + int shift = __ffs(size); - if (add_huge_page_size(size) != 0) { - hugetlb_bad_size(); - pr_err("Invalid huge page size specified(%llu)\n", size); - } + if (!arch_hugetlb_valid_size((unsigned long)size)) + return -EINVAL; - return 1; + hugetlb_add_hstate(shift - PAGE_SHIFT); + return 0; } -__setup("hugepagesz=", hugepage_setup_sz); static int __init hugetlbpage_init(void) { @@ -648,7 +623,7 @@ static int __init hugetlbpage_init(void) if (pdshift > shift) { if (!IS_ENABLED(CONFIG_PPC_8xx)) pgtable_cache_add(pdshift - shift); - } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + } else if (IS_ENABLED(CONFIG_PPC_E500) || IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); } @@ -656,10 +631,7 @@ static int __init hugetlbpage_init(void) configured = true; } - if (configured) { - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); - } else + if (!configured) pr_info("Failed to initialize. Disabling HugeTLB"); return 0; @@ -667,20 +639,20 @@ static int __init hugetlbpage_init(void) arch_initcall(hugetlbpage_init); -void flush_dcache_icache_hugepage(struct page *page) +void __init gigantic_hugetlb_cma_reserve(void) { - int i; - void *start; + unsigned long order = 0; - BUG_ON(!PageCompound(page)); + if (radix_enabled()) + order = PUD_SHIFT - PAGE_SHIFT; + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) + /* + * For pseries we do use ibm,expected#pages for reserving 16G pages. + */ + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } + if (order) { + VM_WARN_ON(order < MAX_ORDER); + hugetlb_cma_reserve(order); } } |