From b7a16c7ad790d0ecb44dcb08a6a75d0d0455ab5f Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 3 Feb 2020 17:36:03 -0800 Subject: mm: pagewalk: add 'depth' parameter to pte_hole The pte_hole() callback is called at multiple levels of the page tables. Code dumping the kernel page tables needs to know what at what depth the missing entry is. Add this is an extra parameter to pte_hole(). When the depth isn't know (e.g. processing a vma) then -1 is passed. The depth that is reported is the actual level where the entry is missing (ignoring any folding that is in place), i.e. any levels where PTRS_PER_P?D is set to 1 are ignored. Note that depth starts at 0 for a PGD so that PUD/PMD/PTE retain their natural numbers as levels 2/3/4. Link: http://lkml.kernel.org/r/20191218162402.45610-16-steven.price@arm.com Signed-off-by: Steven Price Tested-by: Zong Li Cc: Albert Ou Cc: Alexandre Ghiti Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Dave Hansen Cc: David S. Miller Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: James Hogan Cc: James Morse Cc: Jerome Glisse Cc: "Liang, Kan" Cc: Mark Rutland Cc: Michael Ellerman Cc: Paul Burton Cc: Paul Mackerras Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Russell King Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hmm.c | 8 ++++---- mm/migrate.c | 5 +++-- mm/mincore.c | 1 + mm/pagewalk.c | 31 +++++++++++++++++++++++++------ 4 files changed, 33 insertions(+), 12 deletions(-) (limited to 'mm') diff --git a/mm/hmm.c b/mm/hmm.c index c49e9dfce5b4..72e5a6d9a417 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, } static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, again: pmd = READ_ONCE(*pmdp); if (pmd_none(pmd)) - return hmm_vma_walk_hole(start, end, walk); + return hmm_vma_walk_hole(start, end, -1, walk); if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { bool fault, write_fault; @@ -487,7 +487,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pud = READ_ONCE(*pudp); if (pud_none(pud)) { - ret = hmm_vma_walk_hole(start, end, walk); + ret = hmm_vma_walk_hole(start, end, -1, walk); goto out_unlock; } @@ -497,7 +497,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, bool fault, write_fault; if (!pud_present(pud)) { - ret = hmm_vma_walk_hole(start, end, walk); + ret = hmm_vma_walk_hole(start, end, -1, walk); goto out_unlock; } diff --git a/mm/migrate.c b/mm/migrate.c index edf42ed90030..b1092876e537 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2151,6 +2151,7 @@ out_unlock: #ifdef CONFIG_DEVICE_PRIVATE static int migrate_vma_collect_hole(unsigned long start, unsigned long end, + __always_unused int depth, struct mm_walk *walk) { struct migrate_vma *migrate = walk->private; @@ -2195,7 +2196,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, again: if (pmd_none(*pmdp)) - return migrate_vma_collect_hole(start, end, walk); + return migrate_vma_collect_hole(start, end, -1, walk); if (pmd_trans_huge(*pmdp)) { struct page *page; @@ -2228,7 +2229,7 @@ again: return migrate_vma_collect_skip(start, end, walk); if (pmd_none(*pmdp)) - return migrate_vma_collect_hole(start, end, + return migrate_vma_collect_hole(start, end, -1, walk); } } diff --git a/mm/mincore.c b/mm/mincore.c index 49b6fa2f6aa1..0e6dd9948f1a 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end, } static int mincore_unmapped_range(unsigned long addr, unsigned long end, + __always_unused int depth, struct mm_walk *walk) { walk->private += __mincore_unmapped_range(addr, end, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 6732fc7ac4c8..5895ce4f1a85 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -4,6 +4,22 @@ #include #include +/* + * We want to know the real level where a entry is located ignoring any + * folding of levels which may be happening. For example if p4d is folded then + * a missing entry found at level 1 (p4d) is actually at level 0 (pgd). + */ +static int real_depth(int depth) +{ + if (depth == 3 && PTRS_PER_PMD == 1) + depth = 2; + if (depth == 2 && PTRS_PER_PUD == 1) + depth = 1; + if (depth == 1 && PTRS_PER_P4D == 1) + depth = 0; + return depth; +} + static int walk_pte_range_inner(pte_t *pte, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -49,6 +65,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(3); pmd = pmd_offset(pud, addr); do { @@ -56,7 +73,7 @@ again: next = pmd_addr_end(addr, end); if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; @@ -106,6 +123,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(2); pud = pud_offset(p4d, addr); do { @@ -113,7 +131,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, next = pud_addr_end(addr, end); if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; @@ -154,13 +172,14 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long next; const struct mm_walk_ops *ops = walk->ops; int err = 0; + int depth = real_depth(1); p4d = p4d_offset(pgd, addr); do { next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, depth, walk); if (err) break; continue; @@ -192,7 +211,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) { if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, 0, walk); if (err) break; continue; @@ -239,7 +258,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, if (pte) err = ops->hugetlb_entry(pte, hmask, addr, next, walk); else if (ops->pte_hole) - err = ops->pte_hole(addr, next, walk); + err = ops->pte_hole(addr, next, -1, walk); if (err) break; @@ -283,7 +302,7 @@ static int walk_page_test(unsigned long start, unsigned long end, if (vma->vm_flags & VM_PFNMAP) { int err = 1; if (ops->pte_hole) - err = ops->pte_hole(start, end, walk); + err = ops->pte_hole(start, end, -1, walk); return err ? err : 1; } return 0; -- cgit v1.2.3-59-g8ed1b