aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/pgtable.c')
-rw-r--r--arch/powerpc/mm/pgtable.c137
1 files changed, 106 insertions, 31 deletions
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index e3759b69f81b..cb2dcdb18f8e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -23,10 +23,18 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
static inline int is_exec_fault(void)
{
@@ -73,18 +81,15 @@ static struct page *maybe_pte_to_page(pte_t pte)
static pte_t set_pte_filter_hash(pte_t pte)
{
- if (radix_enabled())
- return pte;
-
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
cpu_has_feature(CPU_FTR_NOEXECUTE))) {
struct page *pg = maybe_pte_to_page(pte);
if (!pg)
return pte;
- if (!test_bit(PG_arch_1, &pg->flags)) {
+ if (!test_bit(PG_dcache_clean, &pg->flags)) {
flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ set_bit(PG_dcache_clean, &pg->flags);
}
}
return pte;
@@ -100,10 +105,13 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
* instead we "filter out" the exec permission for non clean pages.
*/
-static pte_t set_pte_filter(pte_t pte)
+static inline pte_t set_pte_filter(pte_t pte)
{
struct page *pg;
+ if (radix_enabled())
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return set_pte_filter_hash(pte);
@@ -117,13 +125,13 @@ static pte_t set_pte_filter(pte_t pte)
return pte;
/* If the page clean, we move on */
- if (test_bit(PG_arch_1, &pg->flags))
+ if (test_bit(PG_dcache_clean, &pg->flags))
return pte;
/* If it's an exec fault, we flush the cache and make it clean */
if (is_exec_fault()) {
flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ set_bit(PG_dcache_clean, &pg->flags);
return pte;
}
@@ -136,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
{
struct page *pg;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return pte;
+
if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
return pte;
@@ -162,12 +173,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
goto bail;
/* If the page is already clean, we move on */
- if (test_bit(PG_arch_1, &pg->flags))
+ if (test_bit(PG_dcache_clean, &pg->flags))
goto bail;
- /* Clean the page and set PG_arch_1 */
+ /* Clean the page and set PG_dcache_clean */
flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ set_bit(PG_dcache_clean, &pg->flags);
bail:
return pte_mkexec(pte);
@@ -185,9 +196,6 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
- /* Add the pte bit when trying to set a pte */
- pte = pte_mkpte(pte);
-
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
@@ -198,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
__set_pte_at(mm, addr, ptep, pte, 0);
}
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
/*
* This is called when relaxing access to a PTE. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
@@ -249,22 +266,49 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
#else
/*
- * Not used on non book3s64 platforms. But 8xx
- * can possibly use tsize derived from hstate.
+ * Not used on non book3s64 platforms.
+ * 8xx compares it with mmu_virtual_psize to
+ * know if it is a huge page or not.
*/
- psize = 0;
+ psize = MMU_PAGE_COUNT;
#endif
__ptep_set_access_flags(vma, ptep, pte, addr, psize);
}
return changed;
#endif
}
+
+#if defined(CONFIG_PPC_8xx)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+ pmd_t *pmd = pmd_off(mm, addr);
+ pte_basic_t val;
+ pte_basic_t *entry = (pte_basic_t *)ptep;
+ int num, i;
+
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ pte = set_pte_filter(pte);
+
+ val = pte_val(pte);
+
+ num = number_of_cells_per_pte(pmd, val, 1);
+
+ for (i = 0; i < num; i++, entry++, val += SZ_4K)
+ *entry = val;
+}
+#endif
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef CONFIG_DEBUG_VM
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -272,12 +316,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
return;
pgd = mm->pgd + pgd_index(addr);
BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, addr);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, addr);
/*
* khugepaged to collapse normal pages to hugepage, first set
- * pmd to none to force page fault/gup to take mmap_sem. After
+ * pmd to none to force page fault/gup to take mmap_lock. After
* pmd is set to none, we do a pte_clear which does this assertion
* so if we find pmd none, return.
*/
@@ -305,19 +351,20 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys);
* (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
*
* So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
+ * we can follow the address down to the page and take a ref on it.
* This function need to be called with interrupts disabled. We use this variant
* when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
*/
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hpage_shift)
{
- pgd_t pgd, *pgdp;
+ pgd_t *pgdp;
+ p4d_t p4d, *p4dp;
pud_t pud, *pudp;
pmd_t pmd, *pmdp;
pte_t *ret_pte;
hugepd_t *hpdp = NULL;
- unsigned pdshift = PGDIR_SHIFT;
+ unsigned pdshift;
if (hpage_shift)
*hpage_shift = 0;
@@ -325,24 +372,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
if (is_thp)
*is_thp = false;
- pgdp = pgdir + pgd_index(ea);
- pgd = READ_ONCE(*pgdp);
/*
* Always operate on the local stack value. This make sure the
* value don't get updated by a parallel THP split/collapse,
* page fault or a page unmap. The return pte_t * is still not
* stable. So should be checked there for above conditions.
+ * Top level is an exception because it is folded into p4d.
*/
- if (pgd_none(pgd))
+ pgdp = pgdir + pgd_index(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ p4d = READ_ONCE(*p4dp);
+ pdshift = P4D_SHIFT;
+
+ if (p4d_none(p4d))
return NULL;
- if (pgd_is_leaf(pgd)) {
- ret_pte = (pte_t *)pgdp;
+ if (p4d_is_leaf(p4d)) {
+ ret_pte = (pte_t *)p4dp;
goto out;
}
- if (is_hugepd(__hugepd(pgd_val(pgd)))) {
- hpdp = (hugepd_t *)&pgd;
+ if (is_hugepd(__hugepd(p4d_val(p4d)))) {
+ hpdp = (hugepd_t *)&p4d;
goto out_huge;
}
@@ -352,7 +403,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
* irq disabled
*/
pdshift = PUD_SHIFT;
- pudp = pud_offset(&pgd, ea);
+ pudp = pud_offset(&p4d, ea);
pud = READ_ONCE(*pudp);
if (pud_none(pud))
@@ -421,3 +472,27 @@ out:
return ret_pte;
}
EXPORT_SYMBOL_GPL(__find_linux_pte);
+
+/* Note due to the way vm flags are laid out, the bits are XWR */
+const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_READONLY,
+ [VM_WRITE] = PAGE_COPY,
+ [VM_WRITE | VM_READ] = PAGE_COPY,
+ [VM_EXEC] = PAGE_READONLY_X,
+ [VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_EXEC | VM_WRITE] = PAGE_COPY_X,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_READONLY,
+ [VM_SHARED | VM_WRITE] = PAGE_SHARED,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
+ [VM_SHARED | VM_EXEC] = PAGE_READONLY_X,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X
+};
+
+#ifndef CONFIG_PPC_BOOK3S_64
+DECLARE_VM_GET_PAGE_PROT
+#endif