From fd1a745ce03e37945674c14833870a9af0882e2d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:42 +0000 Subject: mm: support page_mapcount() on page_has_type() pages Return 0 for pages which can't be mapped. This matches how page_mapped() works. It is more convenient for users to not have to filter out these pages. Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- fs/proc/page.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/proc/page.c') diff --git a/fs/proc/page.c b/fs/proc/page.c index 195b077c0fac..9223856c934b 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, */ ppage = pfn_to_online_page(pfn); - if (!ppage || PageSlab(ppage) || page_has_type(ppage)) + if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); @@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page) /* * pseudo flags for the well known (anonymous) memory mapped pages - * - * Note that page->_mapcount is overloaded in SLAB, so the - * simple test in page_mapped() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; -- cgit v1.2.3-59-g8ed1b From dee3d0bef2b00772be430425832ead6aa9d707f9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 17:10:32 +0000 Subject: proc: rewrite stable_page_flags() Reduce the usage of PageFlag tests and reduce the number of compound_head() calls. For multi-page folios, we'll now show all pages as having the flags that apply to them, e.g. if it's dirty, all pages will have the dirty flag set instead of just the head page. The mapped flag is still per page, as is the hwpoison flag. [willy@infradead.org: fix up some bits vs masks] Link: https://lkml.kernel.org/r/20240403173112.1450721-1-willy@infradead.org [willy@infradead.org: fix warnings] Link: https://lkml.kernel.org/r/ZhBPtCYfSuFuUMEz@casper.infradead.org Link: https://lkml.kernel.org/r/20240326171045.410737-11-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Svetly Todorov Signed-off-by: Andrew Morton --- fs/proc/page.c | 69 ++++++++++++++++++++++-------------------- include/linux/huge_mm.h | 4 +-- include/linux/page-flags.h | 2 +- tools/cgroup/memcg_slabinfo.py | 5 ++- 4 files changed, 42 insertions(+), 38 deletions(-) (limited to 'fs/proc/page.c') diff --git a/fs/proc/page.c b/fs/proc/page.c index 9223856c934b..05120263af2a 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -107,10 +107,13 @@ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) return ((kflags >> kbit) & 1) << ubit; } -u64 stable_page_flags(struct page *page) +u64 stable_page_flags(const struct page *page) { - u64 k; - u64 u; + const struct folio *folio; + unsigned long k; + unsigned long mapping; + bool is_anon; + u64 u = 0; /* * pseudo flag: KPF_NOPAGE @@ -118,49 +121,47 @@ u64 stable_page_flags(struct page *page) */ if (!page) return 1 << KPF_NOPAGE; + folio = page_folio(page); - k = page->flags; - u = 0; + k = folio->flags; + mapping = (unsigned long)folio->mapping; + is_anon = mapping & PAGE_MAPPING_ANON; /* * pseudo flags for the well known (anonymous) memory mapped pages */ if (page_mapped(page)) u |= 1 << KPF_MMAP; - if (PageAnon(page)) + if (is_anon) { u |= 1 << KPF_ANON; - if (PageKsm(page)) - u |= 1 << KPF_KSM; + if (mapping & PAGE_MAPPING_KSM) + u |= 1 << KPF_KSM; + } /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ - if (PageHead(page)) - u |= 1 << KPF_COMPOUND_HEAD; - if (PageTail(page)) + if (page == &folio->page) + u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); + else u |= 1 << KPF_COMPOUND_TAIL; - if (PageHuge(page)) + if (folio_test_hugetlb(folio)) u |= 1 << KPF_HUGE; /* - * PageTransCompound can be true for non-huge compound pages (slab - * pages or pages allocated by drivers with __GFP_COMP) because it - * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon + * We need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - if (PageLRU(head) || PageAnon(head)) + else if (folio_test_large(folio)) { + if ((k & (1 << PG_lru)) || is_anon) u |= 1 << KPF_THP; - else if (is_huge_zero_page(head)) { + else if (is_huge_zero_page(&folio->page)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; } } else if (is_zero_pfn(page_to_pfn(page))) u |= 1 << KPF_ZERO_PAGE; - /* * Caveats on high order pages: PG_buddy and PG_slab will only be set * on the head page. @@ -174,16 +175,17 @@ u64 stable_page_flags(struct page *page) u |= 1 << KPF_OFFLINE; if (PageTable(page)) u |= 1 << KPF_PGTABLE; + if (folio_test_slab(folio)) + u |= 1 << KPF_SLAB; - if (page_is_idle(page)) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) + u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); +#else + if (folio_test_idle(folio)) u |= 1 << KPF_IDLE; +#endif u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); - - u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); - if (PageTail(page) && PageSlab(page)) - u |= 1 << KPF_SLAB; - u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); @@ -194,7 +196,8 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); - if (PageSwapCache(page)) +#define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache)) + if ((k & SWAPCACHE) == SWAPCACHE) u |= 1 << KPF_SWAPCACHE; u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); @@ -202,7 +205,10 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); #ifdef CONFIG_MEMORY_FAILURE - u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); + if (u & (1 << KPF_HUGE)) + u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); + else + u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison); #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED @@ -228,7 +234,6 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, { const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; - struct page *ppage; unsigned long src = *ppos; unsigned long pfn; ssize_t ret = 0; @@ -245,9 +250,9 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, * TODO: ZONE_DEVICE support requires to identify * memmaps that were actually initialized. */ - ppage = pfn_to_online_page(pfn); + struct page *page = pfn_to_online_page(pfn); - if (put_user(stable_page_flags(ppage), out)) { + if (put_user(stable_page_flags(page), out)) { ret = -EFAULT; break; } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7576025db55d..1540a1481daf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -351,7 +351,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); extern struct page *huge_zero_page; extern unsigned long huge_zero_pfn; -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_page(const struct page *page) { return READ_ONCE(huge_zero_page) == page; } @@ -480,7 +480,7 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_page(const struct page *page) { return false; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index eaecf544039f..888353c209c0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -734,7 +734,7 @@ static __always_inline bool PageKsm(const struct page *page) TESTPAGEFLAG_FALSE(Ksm, ksm) #endif -u64 stable_page_flags(struct page *page); +u64 stable_page_flags(const struct page *page); /** * folio_xor_flags_has_waiters - Change some folio flags. diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 1d3a90d93fe2..270c28a0d098 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -146,12 +146,11 @@ def detect_kernel_config(): def for_each_slab(prog): - PGSlab = 1 << prog.constant('PG_slab') - PGHead = 1 << prog.constant('PG_head') + PGSlab = ~prog.constant('PG_slab') for page in for_each_page(prog): try: - if page.flags.value_() & PGSlab: + if page.page_type.value_() == PGSlab: yield cast('struct slab *', page) except FaultError: pass -- cgit v1.2.3-59-g8ed1b From 5beaee54a324ba1fe307e341ec825d5d099f4091 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 26 Mar 2024 20:28:22 +0000 Subject: mm: add is_huge_zero_folio() This is the folio equivalent of is_huge_zero_page(). It doesn't add any efficiency, but it does prevent the caller from passing a tail page and getting confused when the predicate returns false. Link: https://lkml.kernel.org/r/20240326202833.523759-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- fs/proc/page.c | 2 +- include/linux/huge_mm.h | 10 ++++++++++ mm/huge_memory.c | 6 +++--- mm/mempolicy.c | 2 +- mm/swap.c | 2 +- mm/swap_state.c | 2 +- mm/userfaultfd.c | 2 +- 7 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/proc/page.c') diff --git a/fs/proc/page.c b/fs/proc/page.c index 05120263af2a..2fb64bdb64eb 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -155,7 +155,7 @@ u64 stable_page_flags(const struct page *page) else if (folio_test_large(folio)) { if ((k & (1 << PG_lru)) || is_anon) u |= 1 << KPF_THP; - else if (is_huge_zero_page(&folio->page)) { + else if (is_huge_zero_folio(folio)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1540a1481daf..600c6008262b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -356,6 +356,11 @@ static inline bool is_huge_zero_page(const struct page *page) return READ_ONCE(huge_zero_page) == page; } +static inline bool is_huge_zero_folio(const struct folio *folio) +{ + return READ_ONCE(huge_zero_page) == &folio->page; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); @@ -485,6 +490,11 @@ static inline bool is_huge_zero_page(const struct page *page) return false; } +static inline bool is_huge_zero_folio(const struct folio *folio) +{ + return false; +} + static inline bool is_huge_zero_pmd(pmd_t pmd) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 75ad971ca45e..5c043c7b5062 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -789,12 +789,12 @@ struct deferred_split *get_deferred_split_queue(struct folio *folio) } #endif -static inline bool is_transparent_hugepage(struct folio *folio) +static inline bool is_transparent_hugepage(const struct folio *folio) { if (!folio_test_large(folio)) return false; - return is_huge_zero_page(&folio->page) || + return is_huge_zero_folio(folio) || folio_test_large_rmappable(folio); } @@ -3085,7 +3085,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, } - is_hzp = is_huge_zero_page(&folio->page); + is_hzp = is_huge_zero_folio(folio); if (is_hzp) { pr_warn_ratelimited("Called split_huge_page for huge zero page\n"); return -EBUSY; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 913cff5da5a3..5743028a63a5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -510,7 +510,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) return; } folio = pfn_folio(pmd_pfn(*pmd)); - if (is_huge_zero_page(&folio->page)) { + if (is_huge_zero_folio(folio)) { walk->action = ACTION_CONTINUE; return; } diff --git a/mm/swap.c b/mm/swap.c index 500a09a48dfd..f72364e92d5f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -985,7 +985,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs) struct folio *folio = folios->folios[i]; unsigned int nr_refs = refs ? refs[i] : 1; - if (is_huge_zero_page(&folio->page)) + if (is_huge_zero_folio(folio)) continue; if (folio_is_zone_device(folio)) { diff --git a/mm/swap_state.c b/mm/swap_state.c index bfc7e8c58a6d..2deac23633cd 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -301,7 +301,7 @@ void free_page_and_swap_cache(struct page *page) struct folio *folio = page_folio(page); free_swap_cache(folio); - if (!is_huge_zero_page(page)) + if (!is_huge_zero_folio(folio)) folio_put(folio); } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3c3539c573e7..a0ec14553fbe 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1664,7 +1664,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, !pmd_none(dst_pmdval)) { struct folio *folio = pfn_folio(pmd_pfn(*src_pmd)); - if (!folio || (!is_huge_zero_page(&folio->page) && + if (!folio || (!is_huge_zero_folio(folio) && !PageAnonExclusive(&folio->page))) { spin_unlock(ptl); err = -EBUSY; -- cgit v1.2.3-59-g8ed1b