diff options
Diffstat (limited to '')
-rw-r--r-- | mm/pagewalk.c | 101 |
1 files changed, 80 insertions, 21 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 928df1638c30..2ff3a5bebceb 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -58,6 +58,45 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return err; } +#ifdef CONFIG_ARCH_HAS_HUGEPD +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, + unsigned long end, struct mm_walk *walk, int pdshift) +{ + int err = 0; + const struct mm_walk_ops *ops = walk->ops; + int shift = hugepd_shift(*phpd); + int page_size = 1 << shift; + + if (!ops->pte_entry) + return 0; + + if (addr & (page_size - 1)) + return 0; + + for (;;) { + pte_t *pte; + + spin_lock(&walk->mm->page_table_lock); + pte = hugepte_offset(*phpd, addr, pdshift); + err = ops->pte_entry(pte, addr, addr + page_size, walk); + spin_unlock(&walk->mm->page_table_lock); + + if (err) + break; + if (addr >= end - page_size) + break; + addr += page_size; + } + return err; +} +#else +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr, + unsigned long end, struct mm_walk *walk, int pdshift) +{ + return 0; +} +#endif + static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -71,7 +110,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, do { again: next = pmd_addr_end(addr, end); - if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) { + if (pmd_none(*pmd)) { if (ops->pte_hole) err = ops->pte_hole(addr, next, depth, walk); if (err) @@ -108,7 +147,10 @@ again: goto again; } - err = walk_pte_range(pmd, addr, next, walk); + if (is_hugepd(__hugepd(pmd_val(*pmd)))) + err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT); + else + err = walk_pte_range(pmd, addr, next, walk); if (err) break; } while (pmd++, addr = next, addr != end); @@ -129,7 +171,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, do { again: next = pud_addr_end(addr, end); - if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) { + if (pud_none(*pud)) { if (ops->pte_hole) err = ops->pte_hole(addr, next, depth, walk); if (err) @@ -157,7 +199,10 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, if (pud_none(*pud)) goto again; - err = walk_pmd_range(pud, addr, next, walk); + if (is_hugepd(__hugepd(pud_val(*pud)))) + err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT); + else + err = walk_pmd_range(pud, addr, next, walk); if (err) break; } while (pud++, addr = next, addr != end); @@ -189,7 +234,9 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, if (err) break; } - if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) + if (is_hugepd(__hugepd(p4d_val(*p4d)))) + err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT); + else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry) err = walk_pud_range(p4d, addr, next, walk); if (err) break; @@ -224,8 +271,9 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, if (err) break; } - if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || - ops->pte_entry) + if (is_hugepd(__hugepd(pgd_val(*pgd)))) + err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT); + else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry) err = walk_p4d_range(pgd, addr, next, walk); if (err) break; @@ -318,19 +366,19 @@ static int __walk_page_range(unsigned long start, unsigned long end, struct vm_area_struct *vma = walk->vma; const struct mm_walk_ops *ops = walk->ops; - if (vma && ops->pre_vma) { + if (ops->pre_vma) { err = ops->pre_vma(start, end, walk); if (err) return err; } - if (vma && is_vm_hugetlb_page(vma)) { + if (is_vm_hugetlb_page(vma)) { if (ops->hugetlb_entry) err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); - if (vma && ops->post_vma) + if (ops->post_vma) ops->post_vma(walk); return err; @@ -373,7 +421,7 @@ static int __walk_page_range(unsigned long start, unsigned long end, * caller-specific data to callbacks, @private should be helpful. * * Locking: - * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem, + * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock, * because these function traverse vma list and/or access to vma's data. */ int walk_page_range(struct mm_struct *mm, unsigned long start, @@ -395,20 +443,24 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, if (!walk.mm) return -EINVAL; - lockdep_assert_held(&walk.mm->mmap_sem); + mmap_assert_locked(walk.mm); vma = find_vma(walk.mm, start); do { if (!vma) { /* after the last vma */ walk.vma = NULL; next = end; + if (ops->pte_hole) + err = ops->pte_hole(start, next, -1, &walk); } else if (start < vma->vm_start) { /* outside vma */ walk.vma = NULL; next = min(end, vma->vm_start); + if (ops->pte_hole) + err = ops->pte_hole(start, next, -1, &walk); } else { /* inside vma */ walk.vma = vma; next = min(end, vma->vm_end); - vma = vma->vm_next; + vma = find_vma(mm, vma->vm_end); err = walk_page_test(start, next, &walk); if (err > 0) { @@ -422,16 +474,23 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, } if (err < 0) break; - } - if (walk.vma || walk.ops->pte_hole) err = __walk_page_range(start, next, &walk); + } if (err) break; } while (start = next, start < end); return err; } -/* +/** + * walk_page_range_novma - walk a range of pagetables not backed by a vma + * @mm: mm_struct representing the target process of page table walk + * @start: start address of the virtual address range + * @end: end address of the virtual address range + * @ops: operation to call during the walk + * @pgd: pgd to walk if different from mm->pgd + * @private: private data for callbacks' usage + * * Similar to walk_page_range() but can walk any page tables even if they are * not backed by VMAs. Because 'unusual' entries may be walked this function * will also not lock the PTEs for the pte_entry() callback. This is useful for @@ -453,9 +512,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, if (start >= end || !walk.mm) return -EINVAL; - lockdep_assert_held(&walk.mm->mmap_sem); + mmap_assert_write_locked(walk.mm); - return __walk_page_range(start, end, &walk); + return walk_pgd_range(start, end, &walk); } int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, @@ -472,7 +531,7 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, if (!walk.mm) return -EINVAL; - lockdep_assert_held(&walk.mm->mmap_sem); + mmap_assert_locked(walk.mm); err = walk_page_test(vma->vm_start, vma->vm_end, &walk); if (err > 0) @@ -498,11 +557,11 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, * Also see walk_page_range() for additional information. * * Locking: - * This function can't require that the struct mm_struct::mmap_sem is held, + * This function can't require that the struct mm_struct::mmap_lock is held, * since @mapping may be mapped by multiple processes. Instead * @mapping->i_mmap_rwsem must be held. This might have implications in the * callbacks, and it's up tho the caller to ensure that the - * struct mm_struct::mmap_sem is not needed. + * struct mm_struct::mmap_lock is not needed. * * Also this means that a caller can't rely on the struct * vm_area_struct::vm_flags to be constant across a call, |