diff options
Diffstat (limited to 'arch/powerpc/mm/pgtable.c')
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 137 |
1 files changed, 106 insertions, 31 deletions
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index e3759b69f81b..cb2dcdb18f8e 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -23,10 +23,18 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/hugetlb.h> -#include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/hugetlb.h> +#include <asm/pte-walk.h> + +#ifdef CONFIG_PPC64 +#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD) +#else +#define PGD_ALIGN PAGE_SIZE +#endif + +pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN); static inline int is_exec_fault(void) { @@ -73,18 +81,15 @@ static struct page *maybe_pte_to_page(pte_t pte) static pte_t set_pte_filter_hash(pte_t pte) { - if (radix_enabled()) - return pte; - pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || cpu_has_feature(CPU_FTR_NOEXECUTE))) { struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; - if (!test_bit(PG_arch_1, &pg->flags)) { + if (!test_bit(PG_dcache_clean, &pg->flags)) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); } } return pte; @@ -100,10 +105,13 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; + if (radix_enabled()) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return set_pte_filter_hash(pte); @@ -117,13 +125,13 @@ static pte_t set_pte_filter(pte_t pte) return pte; /* If the page clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) return pte; /* If it's an exec fault, we flush the cache and make it clean */ if (is_exec_fault()) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); return pte; } @@ -136,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, { struct page *pg; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return pte; @@ -162,12 +173,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, goto bail; /* If the page is already clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) goto bail; - /* Clean the page and set PG_arch_1 */ + /* Clean the page and set PG_dcache_clean */ flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); bail: return pte_mkexec(pte); @@ -185,9 +196,6 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, */ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); - /* Add the pte bit when trying to set a pte */ - pte = pte_mkpte(pte); - /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this * is called. @@ -198,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor @@ -249,22 +266,49 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #else /* - * Not used on non book3s64 platforms. But 8xx - * can possibly use tsize derived from hstate. + * Not used on non book3s64 platforms. + * 8xx compares it with mmu_virtual_psize to + * know if it is a huge page or not. */ - psize = 0; + psize = MMU_PAGE_COUNT; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif } + +#if defined(CONFIG_PPC_8xx) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + pmd_t *pmd = pmd_off(mm, addr); + pte_basic_t val; + pte_basic_t *entry = (pte_basic_t *)ptep; + int num, i; + + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + pte = set_pte_filter(pte); + + val = pte_val(pte); + + num = number_of_cells_per_pte(pmd, val, 1); + + for (i = 0; i < num; i++, entry++, val += SZ_4K) + *entry = val; +} +#endif #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_DEBUG_VM void assert_pte_locked(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -272,12 +316,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) return; pgd = mm->pgd + pgd_index(addr); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, addr); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, addr); /* * khugepaged to collapse normal pages to hugepage, first set - * pmd to none to force page fault/gup to take mmap_sem. After + * pmd to none to force page fault/gup to take mmap_lock. After * pmd is set to none, we do a pte_clear which does this assertion * so if we find pmd none, return. */ @@ -305,19 +351,20 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys); * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table * * So long as we atomically load page table pointers we are safe against teardown, - * we can follow the address down to the the page and take a ref on it. + * we can follow the address down to the page and take a ref on it. * This function need to be called with interrupts disabled. We use this variant * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED */ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) { - pgd_t pgd, *pgdp; + pgd_t *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ret_pte; hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; + unsigned pdshift; if (hpage_shift) *hpage_shift = 0; @@ -325,24 +372,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, if (is_thp) *is_thp = false; - pgdp = pgdir + pgd_index(ea); - pgd = READ_ONCE(*pgdp); /* * Always operate on the local stack value. This make sure the * value don't get updated by a parallel THP split/collapse, * page fault or a page unmap. The return pte_t * is still not * stable. So should be checked there for above conditions. + * Top level is an exception because it is folded into p4d. */ - if (pgd_none(pgd)) + pgdp = pgdir + pgd_index(ea); + p4dp = p4d_offset(pgdp, ea); + p4d = READ_ONCE(*p4dp); + pdshift = P4D_SHIFT; + + if (p4d_none(p4d)) return NULL; - if (pgd_is_leaf(pgd)) { - ret_pte = (pte_t *)pgdp; + if (p4d_is_leaf(p4d)) { + ret_pte = (pte_t *)p4dp; goto out; } - if (is_hugepd(__hugepd(pgd_val(pgd)))) { - hpdp = (hugepd_t *)&pgd; + if (is_hugepd(__hugepd(p4d_val(p4d)))) { + hpdp = (hugepd_t *)&p4d; goto out_huge; } @@ -352,7 +403,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, * irq disabled */ pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); + pudp = pud_offset(&p4d, ea); pud = READ_ONCE(*pudp); if (pud_none(pud)) @@ -421,3 +472,27 @@ out: return ret_pte; } EXPORT_SYMBOL_GPL(__find_linux_pte); + +/* Note due to the way vm flags are laid out, the bits are XWR */ +const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READONLY, + [VM_WRITE] = PAGE_COPY, + [VM_WRITE | VM_READ] = PAGE_COPY, + [VM_EXEC] = PAGE_READONLY_X, + [VM_EXEC | VM_READ] = PAGE_READONLY_X, + [VM_EXEC | VM_WRITE] = PAGE_COPY_X, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = PAGE_READONLY_X, + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X, + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X +}; + +#ifndef CONFIG_PPC_BOOK3S_64 +DECLARE_VM_GET_PAGE_PROT +#endif |