diff options
Diffstat (limited to 'include/linux/page-flags.h')
-rw-r--r-- | include/linux/page-flags.h | 627 |
1 files changed, 432 insertions, 195 deletions
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 77de28bfefb0..0b0ae5084e60 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -63,8 +63,10 @@ * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. + * PG_swapbacked is set when a page uses swap as a backing storage. This are + * usually PageAnon or shmem pages but please note that even anonymous pages + * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as + * a result of MADV_FREE). * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). @@ -81,8 +83,7 @@ */ /* - * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break - * locked- and dirty-page accounting. + * Don't use the pageflags directly. Use the PageFoo macros. * * The page flags field is split into two parts, the main flags area * which extends from the low bits upwards, and the fields area which @@ -127,12 +128,29 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif +#ifdef CONFIG_64BIT + PG_arch_2, +#endif +#ifdef CONFIG_KASAN_HW_TAGS + PG_skip_kasan_poison, +#endif __NR_PAGEFLAGS, + PG_readahead = PG_reclaim, + + /* + * Depending on the way an anonymous folio can be mapped into a page + * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped + * THP), PG_anon_exclusive may be set only for the head page or for + * tail pages of an anonymous folio. For now, we only expect it to be + * set on tail pages for PTE-mapped THP. + */ + PG_anon_exclusive = PG_mappedtodisk, + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -159,39 +177,131 @@ enum pageflags { PG_slob_free = PG_private, /* Compound pages. Stored in first tail page's flags */ - PG_double_map = PG_private_2, + PG_double_map = PG_workingset, + +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_error, +#endif /* non-lru isolated movable page */ PG_isolated = PG_reclaim, + + /* Only valid for buddy pages. Used to track pages that are reported */ + PG_reported = PG_uptodate, + +#ifdef CONFIG_MEMORY_HOTPLUG + /* For self-hosted memmap pages */ + PG_vmemmap_self_hosted = PG_owner_priv_1, +#endif }; +#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) + #ifndef __GENERATING_BOUNDS_H -struct page; /* forward declaration */ +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP +DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key); + +/* + * Return the real head page struct iff the @page is a fake head page, otherwise + * return the @page itself. See Documentation/mm/vmemmap_dedup.rst. + */ +static __always_inline const struct page *page_fixed_fake_head(const struct page *page) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return page; + + /* + * Only addresses aligned with PAGE_SIZE of struct page may be fake head + * struct page. The alignment check aims to avoid access the fields ( + * e.g. compound_head) of the @page[1]. It can avoid touch a (possibly) + * cold cacheline in some cases. + */ + if (IS_ALIGNED((unsigned long)page, PAGE_SIZE) && + test_bit(PG_head, &page->flags)) { + /* + * We can safely access the field of the @page[1] with PG_head + * because the @page is a compound page composed with at least + * two contiguous pages. + */ + unsigned long head = READ_ONCE(page[1].compound_head); + + if (likely(head & 1)) + return (const struct page *)(head - 1); + } + return page; +} +#else +static inline const struct page *page_fixed_fake_head(const struct page *page) +{ + return page; +} +#endif + +static __always_inline int page_is_fake_head(struct page *page) +{ + return page_fixed_fake_head(page) != page; +} -static inline struct page *compound_head(struct page *page) +static inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); if (unlikely(head & 1)) - return (struct page *) (head - 1); - return page; + return head - 1; + return (unsigned long)page_fixed_fake_head(page); } +#define compound_head(page) ((typeof(page))_compound_head(page)) + +/** + * page_folio - Converts from page to folio. + * @p: The page. + * + * Every page is part of a folio. This function cannot be called on a + * NULL pointer. + * + * Context: No reference, nor lock is required on @page. If the caller + * does not hold a reference, this call may race with a folio split, so + * it should re-check the folio still contains this page after gaining + * a reference on the folio. + * Return: The folio which contains this page. + */ +#define page_folio(p) (_Generic((p), \ + const struct page *: (const struct folio *)_compound_head(p), \ + struct page *: (struct folio *)_compound_head(p))) + +/** + * folio_page - Return a page from a folio. + * @folio: The folio. + * @n: The page number to return. + * + * @n is relative to the start of the folio. This function does not + * check that the page number lies within @folio; the caller is presumed + * to have a reference to the page. + */ +#define folio_page(folio, n) nth_page(&(folio)->page, n) + static __always_inline int PageTail(struct page *page) { - return READ_ONCE(page->compound_head) & 1; + return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } static __always_inline int PageCompound(struct page *page) { - return test_bit(PG_head, &page->flags) || PageTail(page); + return test_bit(PG_head, &page->flags) || + READ_ONCE(page->compound_head) & 1; } #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { - return page->flags == PAGE_POISON_PATTERN; + return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM @@ -202,6 +312,15 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static unsigned long *folio_flags(struct folio *folio, unsigned n) +{ + struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + /* * Page flags policies wrt compound pages * @@ -224,6 +343,9 @@ static inline void page_init_poison(struct page *page, size_t size) * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. + * + * PF_SECOND: + * the page flag is stored in the first tail page. */ #define PF_POISONED_CHECK(page) ({ \ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ @@ -239,37 +361,68 @@ static inline void page_init_poison(struct page *page, size_t size) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ PF_POISONED_CHECK(page); }) +#define PF_SECOND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ + PF_POISONED_CHECK(&page[1]); }) + +/* Which page is the flag stored in */ +#define FOLIO_PF_ANY 0 +#define FOLIO_PF_HEAD 0 +#define FOLIO_PF_ONLY_HEAD 0 +#define FOLIO_PF_NO_TAIL 0 +#define FOLIO_PF_NO_COMPOUND 0 +#define FOLIO_PF_SECOND 1 /* * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ +static __always_inline bool folio_test_##lname(struct folio *folio) \ +{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int Page##uname(struct page *page) \ - { return test_bit(PG_##lname, &policy(page, 0)->flags); } +{ return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_set_##lname(struct folio *folio) \ +{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void SetPage##uname(struct page *page) \ - { set_bit(PG_##lname, &policy(page, 1)->flags); } +{ set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void folio_clear_##lname(struct folio *folio) \ +{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void ClearPage##uname(struct page *page) \ - { clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_set_##lname(struct folio *folio) \ +{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __SetPage##uname(struct page *page) \ - { __set_bit(PG_##lname, &policy(page, 1)->flags); } +{ __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ +static __always_inline \ +void __folio_clear_##lname(struct folio *folio) \ +{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline void __ClearPage##uname(struct page *page) \ - { __clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_set_##lname(struct folio *folio) \ +{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestSetPage##uname(struct page *page) \ - { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ +static __always_inline \ +bool folio_test_clear_##lname(struct folio *folio) \ +{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ static __always_inline int TestClearPage##uname(struct page *page) \ - { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } +{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ TESTPAGEFLAG(uname, lname, policy) \ @@ -285,32 +438,40 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) -#define TESTPAGEFLAG_FALSE(uname) \ +#define TESTPAGEFLAG_FALSE(uname, lname) \ +static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ static inline int Page##uname(const struct page *page) { return 0; } -#define SETPAGEFLAG_NOOP(uname) \ +#define SETPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_set_##lname(struct folio *folio) { } \ static inline void SetPage##uname(struct page *page) { } -#define CLEARPAGEFLAG_NOOP(uname) \ +#define CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void folio_clear_##lname(struct folio *folio) { } \ static inline void ClearPage##uname(struct page *page) { } -#define __CLEARPAGEFLAG_NOOP(uname) \ +#define __CLEARPAGEFLAG_NOOP(uname, lname) \ +static inline void __folio_clear_##lname(struct folio *folio) { } \ static inline void __ClearPage##uname(struct page *page) { } -#define TESTSETFLAG_FALSE(uname) \ +#define TESTSETFLAG_FALSE(uname, lname) \ +static inline bool folio_test_set_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestSetPage##uname(struct page *page) { return 0; } -#define TESTCLEARFLAG_FALSE(uname) \ +#define TESTCLEARFLAG_FALSE(uname, lname) \ +static inline bool folio_test_clear_##lname(struct folio *folio) \ +{ return 0; } \ static inline int TestClearPage##uname(struct page *page) { return 0; } -#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname) \ - SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname) +#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ + SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname) -#define TESTSCFLAG_FALSE(uname) \ - TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) +#define TESTSCFLAG_FALSE(uname, lname) \ + TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) @@ -318,6 +479,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) + TESTCLEARFLAG(LRU, lru, PF_HEAD) PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) @@ -344,10 +506,9 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) /* * Private page markings that may be used by the filesystem that owns the page * for its own purposes. - * - PG_private and PG_private_2 cause releasepage() and co to be invoked + * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ -PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY) - __CLEARPAGEFLAG(Private, private, PF_ANY) +PAGEFLAG(Private, private, PF_ANY) PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) @@ -363,8 +524,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND) +PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) + TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) #ifdef CONFIG_HIGHMEM /* @@ -373,22 +534,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND) */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else -PAGEFLAG_FALSE(HighMem) +PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline int PageSwapCache(struct page *page) +static __always_inline bool folio_test_swapcache(struct folio *folio) { -#ifdef CONFIG_THP_SWAP - page = compound_head(page); -#endif - return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags); + return folio_test_swapbacked(folio) && + test_bit(PG_swapcache, folio_flags(folio, 0)); +} +static __always_inline bool PageSwapCache(struct page *page) +{ + return folio_test_swapcache(page_folio(page)); } + SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) #else -PAGEFLAG_FALSE(SwapCache) +PAGEFLAG_FALSE(SwapCache, swapcache) #endif PAGEFLAG(Unevictable, unevictable, PF_HEAD) @@ -400,37 +564,57 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) #else -PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked) - TESTSCFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) + TESTSCFLAG_FALSE(Mlocked, mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) #else -PAGEFLAG_FALSE(Uncached) +PAGEFLAG_FALSE(Uncached, uncached) #endif #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) -extern bool set_hwpoison_free_buddy_page(struct page *page); +#define MAGIC_HWPOISON 0x48575053U /* HWPS */ +extern void SetPageHWPoisonTakenOff(struct page *page); +extern void ClearPageHWPoisonTakenOff(struct page *page); +extern bool take_page_off_buddy(struct page *page); +extern bool put_page_back_buddy(struct page *page); #else -PAGEFLAG_FALSE(HWPoison) -static inline bool set_hwpoison_free_buddy_page(struct page *page) -{ - return 0; -} +PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) TESTPAGEFLAG(Young, young, PF_ANY) SETPAGEFLAG(Young, young, PF_ANY) TESTCLEARFLAG(Young, young, PF_ANY) PAGEFLAG(Idle, idle, PF_ANY) #endif +#ifdef CONFIG_KASAN_HW_TAGS +PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) +#else +PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison) +#endif + +/* + * PageReported() is used to track reported free pages within the Buddy + * allocator. We can use the non-atomic version of the test and set + * operations as both should be shielded with the zone lock to prevent + * any possible races on the setting or clearing of the bit. + */ +__PAGEFLAG(Reported, reported, PF_NO_COMPOUND) + +#ifdef CONFIG_MEMORY_HOTPLUG +PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) +#else +PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; @@ -442,7 +626,7 @@ PAGEFLAG(Idle, idle, PF_ANY) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" @@ -453,15 +637,36 @@ PAGEFLAG(Idle, idle, PF_ANY) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) +/* + * Different with flags above, this flag is used only for fsdax mode. It + * indicates that this page->mapping is now under reflink case. + */ +#define PAGE_MAPPING_DAX_COW 0x1 + +static __always_inline bool folio_mapping_flags(struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; +} + static __always_inline int PageMappingFlags(struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageAnon(struct page *page) +static __always_inline bool folio_test_anon(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; + return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; +} + +static __always_inline bool PageAnon(struct page *page) +{ + return folio_test_anon(page_folio(page)); +} + +static __always_inline bool __folio_test_movable(const struct folio *folio) +{ + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == + PAGE_MAPPING_MOVABLE; } static __always_inline int __PageMovable(struct page *page) @@ -477,30 +682,42 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline int PageKsm(struct page *page) +static __always_inline bool folio_test_ksm(struct folio *folio) { - page = compound_head(page); - return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == + return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } + +static __always_inline bool PageKsm(struct page *page) +{ + return folio_test_ksm(page_folio(page)); +} #else -TESTPAGEFLAG_FALSE(Ksm) +TESTPAGEFLAG_FALSE(Ksm, ksm) #endif u64 stable_page_flags(struct page *page); -static inline int PageUptodate(struct page *page) +/** + * folio_test_uptodate - Is this folio up to date? + * @folio: The folio. + * + * The uptodate flag is set on a folio when every byte in the folio is + * at least as new as the corresponding bytes on storage. Anonymous + * and CoW folios are always uptodate. If the folio is not uptodate, + * some of the bytes in it may be; see the is_partially_uptodate() + * address_space operation. + */ +static inline bool folio_test_uptodate(struct folio *folio) { - int ret; - page = compound_head(page); - ret = test_bit(PG_uptodate, &(page)->flags); + bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); /* - * Must ensure that the data we read out of the page is loaded - * _after_ we've loaded page->flags to check for PageUptodate. - * We can skip the barrier if the page is not uptodate, because + * Must ensure that the data we read out of the folio is loaded + * _after_ we've loaded folio->flags to check the uptodate bit. + * We can skip the barrier if the folio is not uptodate, because * we wouldn't be reading anything from it. * - * See SetPageUptodate() for the other side of the story. + * See folio_mark_uptodate() for the other side of the story. */ if (ret) smp_rmb(); @@ -508,46 +725,83 @@ static inline int PageUptodate(struct page *page) return ret; } -static __always_inline void __SetPageUptodate(struct page *page) +static inline int PageUptodate(struct page *page) +{ + return folio_test_uptodate(page_folio(page)); +} + +static __always_inline void __folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); - __set_bit(PG_uptodate, &page->flags); + __set_bit(PG_uptodate, folio_flags(folio, 0)); } -static __always_inline void SetPageUptodate(struct page *page) +static __always_inline void folio_mark_uptodate(struct folio *folio) { - VM_BUG_ON_PAGE(PageTail(page), page); /* * Memory barrier must be issued before setting the PG_uptodate bit, - * so that all previous stores issued in order to bring the page - * uptodate are actually visible before PageUptodate becomes true. + * so that all previous stores issued in order to bring the folio + * uptodate are actually visible before folio_test_uptodate becomes true. */ smp_wmb(); - set_bit(PG_uptodate, &page->flags); + set_bit(PG_uptodate, folio_flags(folio, 0)); +} + +static __always_inline void __SetPageUptodate(struct page *page) +{ + __folio_mark_uptodate((struct folio *)page); +} + +static __always_inline void SetPageUptodate(struct page *page) +{ + folio_mark_uptodate((struct folio *)page); } CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL) -int test_clear_page_writeback(struct page *page); -int __test_set_page_writeback(struct page *page, bool keep_write); +bool __folio_start_writeback(struct folio *folio, bool keep_write); +bool set_page_writeback(struct page *page); -#define test_set_page_writeback(page) \ - __test_set_page_writeback(page, false) -#define test_set_page_writeback_keepwrite(page) \ - __test_set_page_writeback(page, true) +#define folio_start_writeback(folio) \ + __folio_start_writeback(folio, false) +#define folio_start_writeback_keepwrite(folio) \ + __folio_start_writeback(folio, true) -static inline void set_page_writeback(struct page *page) +static inline void set_page_writeback_keepwrite(struct page *page) { - test_set_page_writeback(page); + folio_start_writeback_keepwrite(page_folio(page)); } -static inline void set_page_writeback_keepwrite(struct page *page) +static inline bool test_set_page_writeback(struct page *page) +{ + return set_page_writeback(page); +} + +static __always_inline bool folio_test_head(struct folio *folio) { - test_set_page_writeback_keepwrite(page); + return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); } -__PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) +static __always_inline int PageHead(struct page *page) +{ + PF_POISONED_CHECK(page); + return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); +} + +__SETPAGEFLAG(Head, head, PF_ANY) +__CLEARPAGEFLAG(Head, head, PF_ANY) +CLEARPAGEFLAG(Head, head, PF_ANY) + +/** + * folio_test_large() - Does this folio contain more than one page? + * @folio: The folio to test. + * + * Return: True if the folio is larger than one page. + */ +static inline bool folio_test_large(struct folio *folio) +{ + return folio_test_head(folio); +} static __always_inline void set_compound_head(struct page *page, struct page *head) { @@ -572,18 +826,15 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); -bool page_huge_active(struct page *page); -#else -TESTPAGEFLAG_FALSE(Huge) -TESTPAGEFLAG_FALSE(HeadHuge) - -static inline bool page_huge_active(struct page *page) +static inline bool folio_test_hugetlb(struct folio *folio) { - return 0; + return PageHeadHuge(&folio->page); } +#else +TESTPAGEFLAG_FALSE(Huge, hugetlb) +TESTPAGEFLAG_FALSE(HeadHuge, headhuge) #endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -599,6 +850,11 @@ static inline int PageTransHuge(struct page *page) return PageHead(page); } +static inline bool folio_test_transhuge(struct folio *folio) +{ + return folio_test_head(folio); +} + /* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -610,43 +866,6 @@ static inline int PageTransCompound(struct page *page) } /* - * PageTransCompoundMap is the same as PageTransCompound, but it also - * guarantees the primary MMU has the entire compound page mapped - * through pmd_trans_huge, which in turn guarantees the secondary MMUs - * can also map the entire compound page. This allows the secondary - * MMUs to call get_user_pages() only once for each compound page and - * to immediately map the entire compound page with a single secondary - * MMU fault. If there will be a pmd split later, the secondary MMUs - * will get an update through the MMU notifier invalidation through - * split_huge_pmd(). - * - * Unlike PageTransCompound, this is safe to be called only while - * split_huge_pmd() cannot run from under us, like if protected by the - * MMU notifier, otherwise it may result in page->_mapcount check false - * positives. - * - * We have to treat page cache THP differently since every subpage of it - * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE - * mapped in the current process so comparing subpage's _mapcount to - * compound_mapcount to filter out PTE mapped case. - */ -static inline int PageTransCompoundMap(struct page *page) -{ - struct page *head; - - if (!PageTransCompound(page)) - return 0; - - if (PageAnon(page)) - return atomic_read(&page->_mapcount) < 0; - - head = compound_head(page); - /* File THP is PMD mapped and not PTE mapped */ - return atomic_read(&page->_mapcount) == - atomic_read(compound_mapcount_ptr(head)); -} - -/* * PageTransTail returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. @@ -669,44 +888,43 @@ static inline int PageTransTail(struct page *page) * * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap(). */ -static inline int PageDoubleMap(struct page *page) -{ - return PageHead(page) && test_bit(PG_double_map, &page[1].flags); -} - -static inline void SetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - set_bit(PG_double_map, &page[1].flags); -} +PAGEFLAG(DoubleMap, double_map, PF_SECOND) + TESTSCFLAG(DoubleMap, double_map, PF_SECOND) +#else +TESTPAGEFLAG_FALSE(TransHuge, transhuge) +TESTPAGEFLAG_FALSE(TransCompound, transcompound) +TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) +TESTPAGEFLAG_FALSE(TransTail, transtail) +PAGEFLAG_FALSE(DoubleMap, double_map) + TESTSCFLAG_FALSE(DoubleMap, double_map) +#endif -static inline void ClearPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - clear_bit(PG_double_map, &page[1].flags); -} -static inline int TestSetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_set_bit(PG_double_map, &page[1].flags); -} +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) +#endif -static inline int TestClearPageDoubleMap(struct page *page) +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) { - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_clear_bit(PG_double_map, &page[1].flags); + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); } -#else -TESTPAGEFLAG_FALSE(TransHuge) -TESTPAGEFLAG_FALSE(TransCompound) -TESTPAGEFLAG_FALSE(TransCompoundMap) -TESTPAGEFLAG_FALSE(TransTail) -PAGEFLAG_FALSE(DoubleMap) - TESTSETFLAG_FALSE(DoubleMap) - TESTCLEARFLAG_FALSE(DoubleMap) -#endif - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -721,9 +939,8 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -#define PG_kmemcg 0x00000200 -#define PG_table 0x00000400 -#define PG_guard 0x00000800 +#define PG_table 0x00000200 +#define PG_guard 0x00000400 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -761,14 +978,28 @@ PAGE_TYPE_OPS(Buddy, buddy) * not onlined when onlining the section). * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. + * + * If a driver wants to allow to offline unmovable PageOffline() pages without + * putting them back to the buddy, it can do so via the memory notifier by + * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the + * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() + * pages (now with a reference count of zero) are treated like free pages, + * allowing the containing memory block to get offlined. A driver that + * relies on this feature is aware that re-onlining the memory block will + * require to re-set the pages PageOffline() and not giving them to the + * buddy via online_page_callback_t. + * + * There are drivers that mark a page PageOffline() and expect there won't be + * any further access to page content. PFN walkers that read content of random + * pages should check PageOffline() and synchronize with such drivers using + * page_offline_freeze()/page_offline_thaw(). */ PAGE_TYPE_OPS(Offline, offline) -/* - * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on - * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. - */ -PAGE_TYPE_OPS(Kmemcg, kmemcg) +extern void page_offline_freeze(void); +extern void page_offline_thaw(void); +extern void page_offline_begin(void); +extern void page_offline_end(void); /* * Marks pages in use as page tables. @@ -782,34 +1013,34 @@ PAGE_TYPE_OPS(Guard, guard) extern bool is_free_buddy_page(struct page *page); -__PAGEFLAG(Isolated, isolated, PF_ANY); +PAGEFLAG(Isolated, isolated, PF_ANY); -/* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. - */ -static inline int PageSlabPfmemalloc(struct page *page) +static __always_inline int PageAnonExclusive(struct page *page) { - VM_BUG_ON_PAGE(!PageSlab(page), page); - return PageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } -static inline void SetPageSlabPfmemalloc(struct page *page) +static __always_inline void SetPageAnonExclusive(struct page *page) { - VM_BUG_ON_PAGE(!PageSlab(page), page); - SetPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } -static inline void __ClearPageSlabPfmemalloc(struct page *page) +static __always_inline void ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON_PAGE(!PageSlab(page), page); - __ClearPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } -static inline void ClearPageSlabPfmemalloc(struct page *page) +static __always_inline void __ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON_PAGE(!PageSlab(page), page); - ClearPageActive(page); + VM_BUG_ON_PGFLAGS(!PageAnon(page), page); + VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + __clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } #ifdef CONFIG_MMU @@ -820,25 +1051,25 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) /* * Flags checked when a page is freed. Pages being freed should not have - * these flags set. It they are, there is a problem. + * these flags set. If they are, there is a problem. */ #define PAGE_FLAGS_CHECK_AT_FREE \ (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have these flags set. It they are set, + * Pages being prepped should not have these flags set. If they are set, * there has been a kernel bug or struct page corruption. * * __PG_HWPOISON is exceptional because it needs to be kept beyond page's * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) + ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) @@ -854,11 +1085,17 @@ static inline int page_has_private(struct page *page) return !!(page->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_has_private(struct folio *folio) +{ + return page_has_private(&folio->page); +} + #undef PF_ANY #undef PF_HEAD #undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND +#undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ |