From ece014141cd4b49f2d763f28b19e417b84460560 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 07:29:47 -0400 Subject: mm/doc: Add documentation for folio_test_uptodate Move the PG_uptodate documentation to be documentation for folio_test_uptodate() and expand on it a little. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/page-flags.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5f14d581113..b3d353d537e2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -68,9 +68,6 @@ * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as * a result of MADV_FREE). * - * PG_uptodate tells whether the page's contents is valid. When a read - * completes, the page becomes uptodate, unless a disk I/O error happened. - * * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * @@ -615,6 +612,16 @@ TESTPAGEFLAG_FALSE(Ksm, ksm) u64 stable_page_flags(struct page *page); +/** + * folio_test_uptodate - Is this folio up to date? + * @folio: The folio. + * + * The uptodate flag is set on a folio when every byte in the folio is + * at least as new as the corresponding bytes on storage. Anonymous + * and CoW folios are always uptodate. If the folio is not uptodate, + * some of the bytes in it may be; see the is_partially_uptodate() + * address_space operation. + */ static inline bool folio_test_uptodate(struct folio *folio) { bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); -- cgit v1.2.3-59-g8ed1b From 10331795fb7991a39ebd0330fdb074cbd81fef48 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Dec 2021 15:24:51 -0500 Subject: pagevec: Add folio_batch The folio_batch is the same as the pagevec, except that it is typed to contain folios and not pages. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagevec.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7f3f19065a9f..c3fa616d7ae7 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -15,8 +15,10 @@ #define PAGEVEC_SIZE 15 struct page; +struct folio; struct address_space; +/* Layout must match folio_batch */ struct pagevec { unsigned char nr; bool percpu_pvec_drained; @@ -81,4 +83,71 @@ static inline void pagevec_release(struct pagevec *pvec) __pagevec_release(pvec); } +/** + * struct folio_batch - A collection of folios. + * + * The folio_batch is used to amortise the cost of retrieving and + * operating on a set of folios. The order of folios in the batch may be + * significant (eg delete_from_page_cache_batch()). Some users of the + * folio_batch store "exceptional" entries in it which can be removed + * by calling folio_batch_remove_exceptionals(). + */ +struct folio_batch { + unsigned char nr; + bool percpu_pvec_drained; + struct folio *folios[PAGEVEC_SIZE]; +}; + +/* Layout must match pagevec */ +static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch)); +static_assert(offsetof(struct pagevec, pages) == + offsetof(struct folio_batch, folios)); + +/** + * folio_batch_init() - Initialise a batch of folios + * @fbatch: The folio batch. + * + * A freshly initialised folio_batch contains zero folios. + */ +static inline void folio_batch_init(struct folio_batch *fbatch) +{ + fbatch->nr = 0; +} + +static inline unsigned int folio_batch_count(struct folio_batch *fbatch) +{ + return fbatch->nr; +} + +static inline unsigned int fbatch_space(struct folio_batch *fbatch) +{ + return PAGEVEC_SIZE - fbatch->nr; +} + +/** + * folio_batch_add() - Add a folio to a batch. + * @fbatch: The folio batch. + * @folio: The folio to add. + * + * The folio is added to the end of the batch. + * The batch must have previously been initialised using folio_batch_init(). + * + * Return: The number of slots still available. + */ +static inline unsigned folio_batch_add(struct folio_batch *fbatch, + struct folio *folio) +{ + fbatch->folios[fbatch->nr++] = folio; + return fbatch_space(fbatch); +} + +static inline void folio_batch_release(struct folio_batch *fbatch) +{ + pagevec_release((struct pagevec *)fbatch); +} + +static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch) +{ + pagevec_remove_exceptionals((struct pagevec *)fbatch); +} #endif /* _LINUX_PAGEVEC_H */ -- cgit v1.2.3-59-g8ed1b From d9c19d32d86fa54934b632c4314beb067bf98378 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Oct 2021 10:39:06 -0400 Subject: iov_iter: Add copy_folio_to_iter() This wrapper around copy_page_to_iter() works because copy_page_to_iter() handles compound pages correctly. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/uio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 6350354f97e9..43321dbebba8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -7,6 +7,7 @@ #include #include +#include #include struct page; @@ -146,6 +147,12 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_to_iter(&folio->page, offset, bytes, i); +} + static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { -- cgit v1.2.3-59-g8ed1b From 5bf34d7c7ffe773c3b3c1b6ebf39e0f34a2436ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 14:24:43 -0500 Subject: mm: Add folio_test_pmd_mappable() Add a predicate to determine if the folio might be mapped by a PMD entry. If CONFIG_TRANSPARENT_HUGEPAGE is disabled, we know it can't be, even if it's large enough. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/huge_mm.h | 14 ++++++++++++++ include/linux/mm.h | 42 +++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f280f33ff223..e4c18ba8d3bf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -274,6 +274,15 @@ static inline int thp_nr_pages(struct page *page) return 1; } +/** + * folio_test_pmd_mappable - Can we map this folio with a PMD? + * @folio: The folio to test + */ +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return folio_order(folio) >= HPAGE_PMD_ORDER; +} + struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, @@ -339,6 +348,11 @@ static inline int thp_nr_pages(struct page *page) return 1; } +static inline bool folio_test_pmd_mappable(struct folio *folio) +{ + return false; +} + static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { return false; diff --git a/include/linux/mm.h b/include/linux/mm.h index a7e4a9e7d807..72ca04f16711 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -714,6 +714,27 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +static inline unsigned int compound_order(struct page *page) +{ + if (!PageHead(page)) + return 0; + return page[1].compound_order; +} + +/** + * folio_order - The allocation order of a folio. + * @folio: The folio. + * + * A folio is composed of 2^order pages. See get_order() for the definition + * of order. + * + * Return: The order of the folio. + */ +static inline unsigned int folio_order(struct folio *folio) +{ + return compound_order(&folio->page); +} + #include /* @@ -906,27 +927,6 @@ static inline void destroy_compound_page(struct page *page) compound_page_dtors[page[1].compound_dtor](page); } -static inline unsigned int compound_order(struct page *page) -{ - if (!PageHead(page)) - return 0; - return page[1].compound_order; -} - -/** - * folio_order - The allocation order of a folio. - * @folio: The folio. - * - * A folio is composed of 2^order pages. See get_order() for the definition - * of order. - * - * Return: The order of the folio. - */ -static inline unsigned int folio_order(struct folio *folio) -{ - return compound_order(&folio->page); -} - static inline bool hpage_pincount_available(struct page *page) { /* -- cgit v1.2.3-59-g8ed1b From 9f2b04a25a41b1f41b3cead4f56854a4192ec5b0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Aug 2021 23:36:31 -0400 Subject: filemap: Add folio_put_wait_locked() Convert all three callers of put_and_wait_on_page_locked() to folio_put_wait_locked(). This shrinks the kernel overall by 19 bytes. filemap_update_page() shrinks by 19 bytes while __migration_entry_wait() is unchanged. folio_put_wait_locked() is 14 bytes smaller than put_and_wait_on_page_locked(), but pmd_migration_entry_wait() grows by 14 bytes. It removes the assumption from pmd_migration_entry_wait() that pages cannot be larger than a PMD (which is true today, but may be interesting to explore in the future). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 2 +- mm/filemap.c | 27 +++++++++++++++------------ mm/migrate.c | 21 ++++++++++----------- 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..841f7ba62d7d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -868,7 +868,7 @@ static inline int wait_on_page_locked_killable(struct page *page) return folio_wait_locked_killable(page_folio(page)); } -int put_and_wait_on_page_locked(struct page *page, int state); +int folio_put_wait_locked(struct folio *folio, int state); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); diff --git a/mm/filemap.c b/mm/filemap.c index 39c4c46c6133..5dd3c6e39c9f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1259,10 +1259,10 @@ enum behavior { * __folio_lock() waiting on then setting PG_locked. */ SHARED, /* Hold ref to page and check the bit when woken, like - * wait_on_page_writeback() waiting on PG_writeback. + * folio_wait_writeback() waiting on PG_writeback. */ DROP, /* Drop ref to page before wait, no check when woken, - * like put_and_wait_on_page_locked() on PG_locked. + * like folio_put_wait_locked() on PG_locked. */ }; @@ -1439,22 +1439,21 @@ int folio_wait_bit_killable(struct folio *folio, int bit_nr) EXPORT_SYMBOL(folio_wait_bit_killable); /** - * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked - * @page: The page to wait for. + * folio_put_wait_locked - Drop a reference and wait for it to be unlocked + * @folio: The folio to wait for. * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc). * - * The caller should hold a reference on @page. They expect the page to + * The caller should hold a reference on @folio. They expect the page to * become unlocked relatively soon, but do not wish to hold up migration - * (for example) by holding the reference while waiting for the page to + * (for example) by holding the reference while waiting for the folio to * come unlocked. After this function returns, the caller should not - * dereference @page. + * dereference @folio. * - * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal. + * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal. */ -int put_and_wait_on_page_locked(struct page *page, int state) +int folio_put_wait_locked(struct folio *folio, int state) { - return folio_wait_bit_common(page_folio(page), PG_locked, state, - DROP); + return folio_wait_bit_common(folio, PG_locked, state, DROP); } /** @@ -2447,7 +2446,11 @@ static int filemap_update_page(struct kiocb *iocb, goto unlock_mapping; if (!(iocb->ki_flags & IOCB_WAITQ)) { filemap_invalidate_unlock_shared(mapping); - put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE); + /* + * This is where we usually end up waiting for a + * previously submitted readahead to finish. + */ + folio_put_wait_locked(folio, TASK_KILLABLE); return AOP_TRUNCATED_PAGE; } error = __folio_lock_async(folio, iocb->ki_waitq); diff --git a/mm/migrate.c b/mm/migrate.c index cf25b00f03c8..311638177536 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -291,7 +291,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, { pte_t pte; swp_entry_t entry; - struct page *page; + struct folio *folio; spin_lock(ptl); pte = *ptep; @@ -302,18 +302,17 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, if (!is_migration_entry(entry)) goto out; - page = pfn_swap_entry_to_page(entry); - page = compound_head(page); + folio = page_folio(pfn_swap_entry_to_page(entry)); /* * Once page cache replacement of page migration started, page_count - * is zero; but we must not call put_and_wait_on_page_locked() without - * a ref. Use get_page_unless_zero(), and just fault again if it fails. + * is zero; but we must not call folio_put_wait_locked() without + * a ref. Use folio_try_get(), and just fault again if it fails. */ - if (!get_page_unless_zero(page)) + if (!folio_try_get(folio)) goto out; pte_unmap_unlock(ptep, ptl); - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); return; out: pte_unmap_unlock(ptep, ptl); @@ -338,16 +337,16 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl; - struct page *page; + struct folio *folio; ptl = pmd_lock(mm, pmd); if (!is_pmd_migration_entry(*pmd)) goto unlock; - page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd)); - if (!get_page_unless_zero(page)) + folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd))); + if (!folio_try_get(folio)) goto unlock; spin_unlock(ptl); - put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE); + folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); return; unlock: spin_unlock(ptl); -- cgit v1.2.3-59-g8ed1b From 621db4880d305bc37b343b1671e03b7eb5d61389 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 8 May 2021 20:04:05 -0400 Subject: filemap: Add filemap_unaccount_folio() Replace unaccount_page_cache_page() with filemap_unaccount_folio(). The bug handling path could be a bit more robust (eg taking into account the mapcounts of tail pages), but it's really never supposed to happen. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 5 ---- mm/filemap.c | 70 ++++++++++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 841f7ba62d7d..077b6f378666 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -884,11 +884,6 @@ static inline void __set_page_dirty(struct page *page, } void folio_account_cleaned(struct folio *folio, struct address_space *mapping, struct bdi_writeback *wb); -static inline void account_page_cleaned(struct page *page, - struct address_space *mapping, struct bdi_writeback *wb) -{ - return folio_account_cleaned(page_folio(page), mapping, wb); -} void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { diff --git a/mm/filemap.c b/mm/filemap.c index 38fb26e16b85..600b8c921a67 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -145,74 +145,74 @@ static void page_cache_delete(struct address_space *mapping, mapping->nrpages -= nr; } -static void unaccount_page_cache_page(struct address_space *mapping, - struct page *page) +static void filemap_unaccount_folio(struct address_space *mapping, + struct folio *folio) { - int nr; + long nr; /* * if we're uptodate, flush out into the cleancache, otherwise * invalidate any existing cleancache entries. We can't leave * stale data around in the cleancache once our page is gone */ - if (PageUptodate(page) && PageMappedToDisk(page)) - cleancache_put_page(page); + if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) + cleancache_put_page(&folio->page); else - cleancache_invalidate_page(mapping, page); + cleancache_invalidate_page(mapping, &folio->page); - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(page_mapped(page), page); - if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); + if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { int mapcount; pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", - current->comm, page_to_pfn(page)); - dump_page(page, "still mapped when deleted"); + current->comm, folio_pfn(folio)); + dump_page(&folio->page, "still mapped when deleted"); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); - mapcount = page_mapcount(page); + mapcount = page_mapcount(&folio->page); if (mapping_exiting(mapping) && - page_count(page) >= mapcount + 2) { + folio_ref_count(folio) >= mapcount + 2) { /* * All vmas have already been torn down, so it's - * a good bet that actually the page is unmapped, + * a good bet that actually the folio is unmapped, * and we'd prefer not to leak it: if we're wrong, * some other bad page check should catch it later. */ - page_mapcount_reset(page); - page_ref_sub(page, mapcount); + page_mapcount_reset(&folio->page); + folio_ref_sub(folio, mapcount); } } - /* hugetlb pages do not participate in page cache accounting. */ - if (PageHuge(page)) + /* hugetlb folios do not participate in page cache accounting. */ + if (folio_test_hugetlb(folio)) return; - nr = thp_nr_pages(page); + nr = folio_nr_pages(folio); - __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr); - if (PageSwapBacked(page)) { - __mod_lruvec_page_state(page, NR_SHMEM, -nr); - if (PageTransHuge(page)) - __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr); - } else if (PageTransHuge(page)) { - __mod_lruvec_page_state(page, NR_FILE_THPS, -nr); + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr); + if (folio_test_swapbacked(folio)) { + __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr); + if (folio_test_pmd_mappable(folio)) + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); + } else if (folio_test_pmd_mappable(folio)) { + __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); } /* - * At this point page must be either written or cleaned by - * truncate. Dirty page here signals a bug and loss of + * At this point folio must be either written or cleaned by + * truncate. Dirty folio here signals a bug and loss of * unwritten data. * - * This fixes dirty accounting after removing the page entirely - * but leaves PageDirty set: it has no effect for truncated - * page and anyway will be cleared before returning page into + * This fixes dirty accounting after removing the folio entirely + * but leaves the dirty flag set: it has no effect for truncated + * folio and anyway will be cleared before returning folio to * buddy allocator. */ - if (WARN_ON_ONCE(PageDirty(page))) - account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); + if (WARN_ON_ONCE(folio_test_dirty(folio))) + folio_account_cleaned(folio, mapping, + inode_to_wb(mapping->host)); } /* @@ -227,7 +227,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) trace_mm_filemap_delete_from_page_cache(page); - unaccount_page_cache_page(mapping, page); + filemap_unaccount_folio(mapping, folio); page_cache_delete(mapping, folio, shadow); } @@ -348,7 +348,7 @@ void delete_from_page_cache_batch(struct address_space *mapping, for (i = 0; i < pagevec_count(pvec); i++) { trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); - unaccount_page_cache_page(mapping, pvec->pages[i]); + filemap_unaccount_folio(mapping, page_folio(pvec->pages[i])); } page_cache_delete_batch(mapping, pvec); xa_unlock_irq(&mapping->i_pages); -- cgit v1.2.3-59-g8ed1b From a0580c6f9babaf4413c8a7e2ab21d68e31f4c754 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 23 Jul 2021 09:29:46 -0400 Subject: filemap: Convert tracing of page cache operations to folio Pass the folio instead of a page. The page was already implicitly a folio as it accessed page->mapping directly. Add the order of the folio to the tracepoint, as this is important information. Also drop printing the address of the struct page as the pfn provides better information than the struct page address. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/trace/events/filemap.h | 32 +++++++++++++++++--------------- mm/filemap.c | 9 +++++---- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index c47b63db124e..46c89c1e460c 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -15,43 +15,45 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, - TP_PROTO(struct page *page), + TP_PROTO(struct folio *folio), - TP_ARGS(page), + TP_ARGS(folio), TP_STRUCT__entry( __field(unsigned long, pfn) __field(unsigned long, i_ino) __field(unsigned long, index) __field(dev_t, s_dev) + __field(unsigned char, order) ), TP_fast_assign( - __entry->pfn = page_to_pfn(page); - __entry->i_ino = page->mapping->host->i_ino; - __entry->index = page->index; - if (page->mapping->host->i_sb) - __entry->s_dev = page->mapping->host->i_sb->s_dev; + __entry->pfn = folio_pfn(folio); + __entry->i_ino = folio->mapping->host->i_ino; + __entry->index = folio->index; + if (folio->mapping->host->i_sb) + __entry->s_dev = folio->mapping->host->i_sb->s_dev; else - __entry->s_dev = page->mapping->host->i_rdev; + __entry->s_dev = folio->mapping->host->i_rdev; + __entry->order = folio_order(folio); ), - TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu", + TP_printk("dev %d:%d ino %lx pfn=0x%lx ofs=%lu order=%u", MAJOR(__entry->s_dev), MINOR(__entry->s_dev), __entry->i_ino, - pfn_to_page(__entry->pfn), __entry->pfn, - __entry->index << PAGE_SHIFT) + __entry->index << PAGE_SHIFT, + __entry->order) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache, - TP_PROTO(struct page *page), - TP_ARGS(page) + TP_PROTO(struct folio *folio), + TP_ARGS(folio) ); DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, - TP_PROTO(struct page *page), - TP_ARGS(page) + TP_PROTO(struct folio *folio), + TP_ARGS(folio) ); TRACE_EVENT(filemap_set_wb_err, diff --git a/mm/filemap.c b/mm/filemap.c index 600b8c921a67..bcdc8bb4d2c8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -225,7 +225,7 @@ void __delete_from_page_cache(struct page *page, void *shadow) struct folio *folio = page_folio(page); struct address_space *mapping = page->mapping; - trace_mm_filemap_delete_from_page_cache(page); + trace_mm_filemap_delete_from_page_cache(folio); filemap_unaccount_folio(mapping, folio); page_cache_delete(mapping, folio, shadow); @@ -346,9 +346,10 @@ void delete_from_page_cache_batch(struct address_space *mapping, spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); for (i = 0; i < pagevec_count(pvec); i++) { - trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); + struct folio *folio = page_folio(pvec->pages[i]); - filemap_unaccount_folio(mapping, page_folio(pvec->pages[i])); + trace_mm_filemap_delete_from_page_cache(folio); + filemap_unaccount_folio(mapping, folio); } page_cache_delete_batch(mapping, pvec); xa_unlock_irq(&mapping->i_pages); @@ -959,7 +960,7 @@ unlock: goto error; } - trace_mm_filemap_add_to_page_cache(&folio->page); + trace_mm_filemap_add_to_page_cache(folio); return 0; error: folio->mapping = NULL; -- cgit v1.2.3-59-g8ed1b From 452e9e6992fe058a650c81d01a9982e3faf10278 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 May 2021 09:33:42 -0400 Subject: filemap: Add filemap_remove_folio and __filemap_remove_folio Reimplement __delete_from_page_cache() as a wrapper around __filemap_remove_folio() and delete_from_page_cache() as a wrapper around filemap_remove_folio(). Remove the EXPORT_SYMBOL as delete_from_page_cache() was not used by any in-tree modules. Convert page_cache_free_page() into filemap_free_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 9 +++++++-- mm/filemap.c | 43 ++++++++++++++++++++----------------------- mm/folio-compat.c | 5 +++++ 3 files changed, 32 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 077b6f378666..3f26b191ede3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -930,8 +930,13 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); -extern void delete_from_page_cache(struct page *page); -extern void __delete_from_page_cache(struct page *page, void *shadow); +void filemap_remove_folio(struct folio *folio); +void delete_from_page_cache(struct page *page); +void __filemap_remove_folio(struct folio *folio, void *shadow); +static inline void __delete_from_page_cache(struct page *page, void *shadow) +{ + __filemap_remove_folio(page_folio(page), shadow); +} void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); diff --git a/mm/filemap.c b/mm/filemap.c index bcdc8bb4d2c8..4fe845b30f33 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -220,58 +220,55 @@ static void filemap_unaccount_folio(struct address_space *mapping, * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the i_pages lock. */ -void __delete_from_page_cache(struct page *page, void *shadow) +void __filemap_remove_folio(struct folio *folio, void *shadow) { - struct folio *folio = page_folio(page); - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; trace_mm_filemap_delete_from_page_cache(folio); - filemap_unaccount_folio(mapping, folio); page_cache_delete(mapping, folio, shadow); } -static void page_cache_free_page(struct address_space *mapping, - struct page *page) +static void filemap_free_folio(struct address_space *mapping, + struct folio *folio) { void (*freepage)(struct page *); freepage = mapping->a_ops->freepage; if (freepage) - freepage(page); + freepage(&folio->page); - if (PageTransHuge(page) && !PageHuge(page)) { - page_ref_sub(page, thp_nr_pages(page)); - VM_BUG_ON_PAGE(page_count(page) <= 0, page); + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) { + folio_ref_sub(folio, folio_nr_pages(folio)); + VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio); } else { - put_page(page); + folio_put(folio); } } /** - * delete_from_page_cache - delete page from page cache - * @page: the page which the kernel is trying to remove from page cache + * filemap_remove_folio - Remove folio from page cache. + * @folio: The folio. * - * This must be called only on pages that have been verified to be in the page - * cache and locked. It will never put the page into the free list, the caller - * has a reference on the page. + * This must be called only on folios that are locked and have been + * verified to be in the page cache. It will never put the folio into + * the free list because the caller has a reference on the page. */ -void delete_from_page_cache(struct page *page) +void filemap_remove_folio(struct folio *folio) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping = folio->mapping; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); - __delete_from_page_cache(page, NULL); + __filemap_remove_folio(folio, NULL); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); - page_cache_free_page(mapping, page); + filemap_free_folio(mapping, folio); } -EXPORT_SYMBOL(delete_from_page_cache); /* * page_cache_delete_batch - delete several pages from page cache @@ -358,7 +355,7 @@ void delete_from_page_cache_batch(struct address_space *mapping, spin_unlock(&mapping->host->i_lock); for (i = 0; i < pagevec_count(pvec); i++) - page_cache_free_page(mapping, pvec->pages[i]); + filemap_free_folio(mapping, page_folio(pvec->pages[i])); } int filemap_check_errors(struct address_space *mapping) diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 5b6ae1da314e..749a695b4217 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -140,3 +140,8 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, mapping_gfp_mask(mapping)); } EXPORT_SYMBOL(grab_cache_page_write_begin); + +void delete_from_page_cache(struct page *page) +{ + return filemap_remove_folio(page_folio(page)); +} -- cgit v1.2.3-59-g8ed1b From bb2e98b613a3c76c904dfa82eb4b86773817598b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 16:14:50 -0500 Subject: filemap: Remove thp_contains() This function is now unused, so delete it. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3f26b191ede3..8c2cad7f0c36 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -512,15 +512,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -/* Does this page contain this index? */ -static inline bool thp_contains(struct page *head, pgoff_t index) -{ - /* HugeTLBfs indexes the page cache in units of hpage_size */ - if (PageHuge(head)) - return head->index == index; - return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); -} - #define swapcache_index(folio) __page_file_index(&(folio)->page) /** -- cgit v1.2.3-59-g8ed1b From 7836d9990079ed611199819ccf487061b748193a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 27 May 2021 12:30:54 -0400 Subject: readahead: Convert page_cache_async_ra() to take a folio Using the folio here avoids checking whether it's a tail page. This patch mostly just enables some of the following patches. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagemap.h | 4 ++-- mm/readahead.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8c2cad7f0c36..30302be6977f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -993,7 +993,7 @@ struct readahead_control { void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); -void page_cache_async_ra(struct readahead_control *, struct page *, +void page_cache_async_ra(struct readahead_control *, struct folio *, unsigned long req_count); void readahead_expand(struct readahead_control *ractl, loff_t new_start, size_t new_len); @@ -1040,7 +1040,7 @@ void page_cache_async_readahead(struct address_space *mapping, struct page *page, pgoff_t index, unsigned long req_count) { DEFINE_READAHEAD(ractl, file, ra, mapping, index); - page_cache_async_ra(&ractl, page, req_count); + page_cache_async_ra(&ractl, page_folio(page), req_count); } static inline struct folio *__readahead_folio(struct readahead_control *ractl) diff --git a/mm/readahead.c b/mm/readahead.c index 6ae5693de28c..e48e78641772 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -581,7 +581,7 @@ void page_cache_sync_ra(struct readahead_control *ractl, EXPORT_SYMBOL_GPL(page_cache_sync_ra); void page_cache_async_ra(struct readahead_control *ractl, - struct page *page, unsigned long req_count) + struct folio *folio, unsigned long req_count) { /* no read-ahead */ if (!ractl->ra->ra_pages) @@ -590,10 +590,10 @@ void page_cache_async_ra(struct readahead_control *ractl, /* * Same bit is used for PG_readahead and PG_reclaim. */ - if (PageWriteback(page)) + if (folio_test_writeback(folio)) return; - ClearPageReadahead(page); + folio_clear_readahead(folio); /* * Defer asynchronous read-ahead on IO congestion. -- cgit v1.2.3-59-g8ed1b From 539a3322f208db478db88c4a76239476defce6b1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 16 Dec 2020 11:45:30 -0500 Subject: filemap: Add read_cache_folio and read_mapping_folio Reimplement read_cache_page() as a wrapper around read_cache_folio(). Saves over 400 bytes of text from do_read_cache_folio() which more than makes up for the extra 100 bytes of text added to the various wrapper functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagemap.h | 12 +++++- mm/filemap.c | 97 ++++++++++++++++++++++++++----------------------- 2 files changed, 61 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 30302be6977f..7bef50ea5435 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -629,8 +629,10 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * read_cache_page(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data); +struct folio *read_cache_folio(struct address_space *, pgoff_t index, + filler_t *filler, void *data); +struct page *read_cache_page(struct address_space *, pgoff_t index, + filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern int read_cache_pages(struct address_space *mapping, @@ -642,6 +644,12 @@ static inline struct page *read_mapping_page(struct address_space *mapping, return read_cache_page(mapping, index, NULL, data); } +static inline struct folio *read_mapping_folio(struct address_space *mapping, + pgoff_t index, void *data) +{ + return read_cache_folio(mapping, index, NULL, data); +} + /* * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) diff --git a/mm/filemap.c b/mm/filemap.c index fc0f1d9904d2..f98e084ffb31 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3418,35 +3418,20 @@ EXPORT_SYMBOL(filemap_page_mkwrite); EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); -static struct page *wait_on_page_read(struct page *page) +static struct folio *do_read_cache_folio(struct address_space *mapping, + pgoff_t index, filler_t filler, void *data, gfp_t gfp) { - if (!IS_ERR(page)) { - wait_on_page_locked(page); - if (!PageUptodate(page)) { - put_page(page); - page = ERR_PTR(-EIO); - } - } - return page; -} - -static struct page *do_read_cache_page(struct address_space *mapping, - pgoff_t index, - int (*filler)(void *, struct page *), - void *data, - gfp_t gfp) -{ - struct page *page; + struct folio *folio; int err; repeat: - page = find_get_page(mapping, index); - if (!page) { - page = __page_cache_alloc(gfp); - if (!page) + folio = filemap_get_folio(mapping, index); + if (!folio) { + folio = filemap_alloc_folio(gfp, 0); + if (!folio) return ERR_PTR(-ENOMEM); - err = add_to_page_cache_lru(page, mapping, index, gfp); + err = filemap_add_folio(mapping, folio, index, gfp); if (unlikely(err)) { - put_page(page); + folio_put(folio); if (err == -EEXIST) goto repeat; /* Presumably ENOMEM for xarray node */ @@ -3455,21 +3440,24 @@ repeat: filler: if (filler) - err = filler(data, page); + err = filler(data, &folio->page); else - err = mapping->a_ops->readpage(data, page); + err = mapping->a_ops->readpage(data, &folio->page); if (err < 0) { - put_page(page); + folio_put(folio); return ERR_PTR(err); } - page = wait_on_page_read(page); - if (IS_ERR(page)) - return page; + folio_wait_locked(folio); + if (!folio_test_uptodate(folio)) { + folio_put(folio); + return ERR_PTR(-EIO); + } + goto out; } - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto out; /* @@ -3503,23 +3491,23 @@ filler: * avoid spurious serialisations and wakeups when multiple processes * wait on the same page for IO to complete. */ - wait_on_page_locked(page); - if (PageUptodate(page)) + folio_wait_locked(folio); + if (folio_test_uptodate(folio)) goto out; /* Distinguish between all the cases under the safety of the lock */ - lock_page(page); + folio_lock(folio); /* Case c or d, restart the operation */ - if (!page->mapping) { - unlock_page(page); - put_page(page); + if (!folio->mapping) { + folio_unlock(folio); + folio_put(folio); goto repeat; } /* Someone else locked and filled the page in a very small window */ - if (PageUptodate(page)) { - unlock_page(page); + if (folio_test_uptodate(folio)) { + folio_unlock(folio); goto out; } @@ -3529,16 +3517,16 @@ filler: * Clear page error before actual read, PG_error will be * set again if read page fails. */ - ClearPageError(page); + folio_clear_error(folio); goto filler; out: - mark_page_accessed(page); - return page; + folio_mark_accessed(folio); + return folio; } /** - * read_cache_page - read into page cache, fill it if needed + * read_cache_folio - read into page cache, fill it if needed * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read @@ -3553,10 +3541,27 @@ out: * * Return: up to date page on success, ERR_PTR() on failure. */ +struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index, + filler_t filler, void *data) +{ + return do_read_cache_folio(mapping, index, filler, data, + mapping_gfp_mask(mapping)); +} +EXPORT_SYMBOL(read_cache_folio); + +static struct page *do_read_cache_page(struct address_space *mapping, + pgoff_t index, filler_t *filler, void *data, gfp_t gfp) +{ + struct folio *folio; + + folio = do_read_cache_folio(mapping, index, filler, data, gfp); + if (IS_ERR(folio)) + return &folio->page; + return folio_file_page(folio, index); +} + struct page *read_cache_page(struct address_space *mapping, - pgoff_t index, - int (*filler)(void *, struct page *), - void *data) + pgoff_t index, filler_t *filler, void *data) { return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); -- cgit v1.2.3-59-g8ed1b From 82c50f8b443359ec99348cd9b1289f55cd47779d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 28 Jul 2021 15:14:48 -0400 Subject: filemap: Add filemap_release_folio() Reimplement try_to_release_page() as a wrapper around filemap_release_folio(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/mm.h | 1 - include/linux/pagemap.h | 2 ++ mm/filemap.c | 39 +++++++++++++++++++-------------------- mm/folio-compat.c | 6 ++++++ 4 files changed, 27 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 72ca04f16711..145f045b0ddc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1970,7 +1970,6 @@ int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); -extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7bef50ea5435..eb6e58e106c8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -939,6 +939,8 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow) void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); +int try_to_release_page(struct page *page, gfp_t gfp); +bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); diff --git a/mm/filemap.c b/mm/filemap.c index bbe982e64e62..4c39e09a2f51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3889,33 +3889,32 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) EXPORT_SYMBOL(generic_file_write_iter); /** - * try_to_release_page() - release old fs-specific metadata on a page + * filemap_release_folio() - Release fs-specific metadata on a folio. + * @folio: The folio which the kernel is trying to free. + * @gfp: Memory allocation flags (and I/O mode). * - * @page: the page which the kernel is trying to free - * @gfp_mask: memory allocation flags (and I/O mode) + * The address_space is trying to release any data attached to a folio + * (presumably at folio->private). * - * The address_space is to try to release any data against the page - * (presumably at page->private). + * This will also be called if the private_2 flag is set on a page, + * indicating that the folio has other metadata associated with it. * - * This may also be called if PG_fscache is set on a page, indicating that the - * page is known to the local caching routines. + * The @gfp argument specifies whether I/O may be performed to release + * this page (__GFP_IO), and whether the call may block + * (__GFP_RECLAIM & __GFP_FS). * - * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). - * - * Return: %1 if the release was successful, otherwise return zero. + * Return: %true if the release was successful, otherwise %false. */ -int try_to_release_page(struct page *page, gfp_t gfp_mask) +bool filemap_release_folio(struct folio *folio, gfp_t gfp) { - struct address_space * const mapping = page->mapping; + struct address_space * const mapping = folio->mapping; - BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) - return 0; + BUG_ON(!folio_test_locked(folio)); + if (folio_test_writeback(folio)) + return false; if (mapping && mapping->a_ops->releasepage) - return mapping->a_ops->releasepage(page, gfp_mask); - return try_to_free_buffers(page); + return mapping->a_ops->releasepage(&folio->page, gfp); + return try_to_free_buffers(&folio->page); } - -EXPORT_SYMBOL(try_to_release_page); +EXPORT_SYMBOL(filemap_release_folio); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 749a695b4217..749555a232a8 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -145,3 +145,9 @@ void delete_from_page_cache(struct page *page) { return filemap_remove_folio(page_folio(page)); } + +int try_to_release_page(struct page *page, gfp_t gfp) +{ + return filemap_release_folio(page_folio(page), gfp); +} +EXPORT_SYMBOL(try_to_release_page); -- cgit v1.2.3-59-g8ed1b From 3506659e18a61ae525f3b9b4f5af23b4b149d4db Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 28 Nov 2021 14:53:35 -0500 Subject: mm: Add unmap_mapping_folio() Convert both callers of unmap_mapping_page() to call unmap_mapping_folio() instead. Also move zap_details from linux/mm.h to mm/memory.c Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/mm.h | 24 ------------------------ mm/internal.h | 4 +++- mm/memory.c | 49 +++++++++++++++++++++++++++++++++++-------------- mm/truncate.c | 4 ++-- 4 files changed, 40 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 145f045b0ddc..c9cdb26802fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1825,28 +1825,6 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct address_space *zap_mapping; /* Check page->mapping if set */ - struct page *single_page; /* Locked page to be unmapped */ -}; - -/* - * We set details->zap_mappings when we want to unmap shared but keep private - * pages. Return true if skip zapping this page, false otherwise. - */ -static inline bool -zap_skip_check_mapping(struct zap_details *details, struct page *page) -{ - if (!details || !page) - return false; - - return details->zap_mapping && - (details->zap_mapping != page_rmapping(page)); -} - struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -1892,7 +1870,6 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); -void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1913,7 +1890,6 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } -static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, diff --git a/mm/internal.h b/mm/internal.h index 3b79a5c9427a..1ca93c6cb18c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -74,6 +74,7 @@ static inline bool can_madv_lru_vma(struct vm_area_struct *vma) return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)); } +struct zap_details; void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, @@ -388,6 +389,7 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma); #ifdef CONFIG_MMU +void unmap_mapping_folio(struct folio *folio); extern long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked); extern long faultin_vma_page_range(struct vm_area_struct *vma, @@ -491,8 +493,8 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, } return fpin; } - #else /* !CONFIG_MMU */ +static inline void unmap_mapping_folio(struct folio *folio) { } static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) diff --git a/mm/memory.c b/mm/memory.c index 8f1de811a1dc..23f2f1300d42 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1304,6 +1304,28 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct address_space *zap_mapping; /* Check page->mapping if set */ + struct folio *single_folio; /* Locked folio to be unmapped */ +}; + +/* + * We set details->zap_mapping when we want to unmap shared but keep private + * pages. Return true if skip zapping this page, false otherwise. + */ +static inline bool +zap_skip_check_mapping(struct zap_details *details, struct page *page) +{ + if (!details || !page) + return false; + + return details->zap_mapping && + (details->zap_mapping != page_rmapping(page)); +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, @@ -1443,8 +1465,8 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ - } else if (details && details->single_page && - PageTransCompound(details->single_page) && + } else if (details && details->single_folio && + folio_test_pmd_mappable(details->single_folio) && next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { spinlock_t *ptl = pmd_lock(tlb->mm, pmd); /* @@ -3332,31 +3354,30 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } /** - * unmap_mapping_page() - Unmap single page from processes. - * @page: The locked page to be unmapped. + * unmap_mapping_folio() - Unmap single folio from processes. + * @folio: The locked folio to be unmapped. * - * Unmap this page from any userspace process which still has it mmaped. + * Unmap this folio from any userspace process which still has it mmaped. * Typically, for efficiency, the range of nearby pages has already been * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once - * truncation or invalidation holds the lock on a page, it may find that - * the page has been remapped again: and then uses unmap_mapping_page() + * truncation or invalidation holds the lock on a folio, it may find that + * the page has been remapped again: and then uses unmap_mapping_folio() * to unmap it finally. */ -void unmap_mapping_page(struct page *page) +void unmap_mapping_folio(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct zap_details details = { }; pgoff_t first_index; pgoff_t last_index; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageTail(page)); + VM_BUG_ON(!folio_test_locked(folio)); - first_index = page->index; - last_index = page->index + thp_nr_pages(page) - 1; + first_index = folio->index; + last_index = folio->index + folio_nr_pages(folio) - 1; details.zap_mapping = mapping; - details.single_page = page; + details.single_folio = folio; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) diff --git a/mm/truncate.c b/mm/truncate.c index ab86b07c1e9c..c98feea75a10 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -180,7 +180,7 @@ void do_invalidatepage(struct page *page, unsigned int offset, static void truncate_cleanup_folio(struct folio *folio) { if (folio_mapped(folio)) - unmap_mapping_page(&folio->page); + unmap_mapping_folio(folio); if (folio_has_private(folio)) do_invalidatepage(&folio->page, 0, folio_size(folio)); @@ -670,7 +670,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, wait_on_page_writeback(page); if (page_mapped(page)) - unmap_mapping_page(page); + unmap_mapping_folio(page_folio(page)); BUG_ON(page_mapped(page)); ret2 = do_launder_page(mapping, page); -- cgit v1.2.3-59-g8ed1b From 1e84a3d997b74c33491899e31d48774f252213ab Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 2 Dec 2021 16:01:55 -0500 Subject: truncate,shmem: Add truncate_inode_folio() Convert all callers of truncate_inode_page() to call truncate_inode_folio() instead, and move the declaration to mm/internal.h. Move the assertion that the caller is not passing in a tail page to generic_error_remove_page(). We can't entirely remove the struct page from the callers yet because the page pointer in the pvec might be a shadow/dax/swap entry instead of actually a page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/mm.h | 1 - mm/internal.h | 1 + mm/shmem.c | 5 +++-- mm/truncate.c | 23 ++++++++++++----------- 4 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c9cdb26802fb..d8b7d7ed14dd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1859,7 +1859,6 @@ extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); -int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); int invalidate_inode_page(struct page *page); diff --git a/mm/internal.h b/mm/internal.h index 1ca93c6cb18c..f9967b0be8bf 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -92,6 +92,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping, unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct pagevec *pvec, pgoff_t *indices); +int truncate_inode_folio(struct address_space *mapping, struct folio *folio); /** * folio_evictable - Test whether a folio is evictable. diff --git a/mm/shmem.c b/mm/shmem.c index 40da9075374b..dbef008fb6e5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -950,7 +950,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index += folio_nr_pages(folio) - 1; if (!unfalloc || !folio_test_uptodate(folio)) - truncate_inode_page(mapping, &folio->page); + truncate_inode_folio(mapping, folio); folio_unlock(folio); } pagevec_remove_exceptionals(&pvec); @@ -1027,7 +1027,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, } VM_BUG_ON_PAGE(PageWriteback(page), page); if (shmem_punch_compound(page, start, end)) - truncate_inode_page(mapping, page); + truncate_inode_folio(mapping, + page_folio(page)); else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Wipe the page and don't get stuck */ clear_highpage(page); diff --git a/mm/truncate.c b/mm/truncate.c index c98feea75a10..0000424fc56b 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -218,12 +218,9 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) return ret; } -int truncate_inode_page(struct address_space *mapping, struct page *page) +int truncate_inode_folio(struct address_space *mapping, struct folio *folio) { - struct folio *folio = page_folio(page); - VM_BUG_ON_PAGE(PageTail(page), page); - - if (page->mapping != mapping) + if (folio->mapping != mapping) return -EIO; truncate_cleanup_folio(folio); @@ -236,6 +233,8 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) */ int generic_error_remove_page(struct address_space *mapping, struct page *page) { + VM_BUG_ON_PAGE(PageTail(page), page); + if (!mapping) return -EINVAL; /* @@ -244,7 +243,7 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page) */ if (!S_ISREG(mapping->host->i_mode)) return -EIO; - return truncate_inode_page(mapping, page); + return truncate_inode_folio(mapping, page_folio(page)); } EXPORT_SYMBOL(generic_error_remove_page); @@ -395,18 +394,20 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; + struct folio *folio; /* We rely upon deletion not changing page->index */ index = indices[i]; if (xa_is_value(page)) continue; + folio = page_folio(page); - lock_page(page); - WARN_ON(page_to_index(page) != index); - wait_on_page_writeback(page); - truncate_inode_page(mapping, page); - unlock_page(page); + folio_lock(folio); + VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); + folio_wait_writeback(folio); + truncate_inode_folio(mapping, folio); + folio_unlock(folio); } truncate_exceptional_pvec_entries(mapping, &pvec, indices); pagevec_release(&pvec); -- cgit v1.2.3-59-g8ed1b From 0e499ed3d7a216706e02eeded562627d3e69dcfd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 1 Sep 2020 23:17:50 -0400 Subject: filemap: Return only folios from find_get_entries() The callers have all been converted to work on folios, so convert find_get_entries() to return a batch of folios instead of pages. We also now return multiple large folios in a single call. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Reviewed-by: William Kucharski Reviewed-by: Christoph Hellwig --- include/linux/pagemap.h | 2 -- mm/filemap.c | 43 +++++++++++-------------------------------- mm/internal.h | 4 ++++ mm/shmem.c | 36 ++++++++++++++++++++---------------- mm/truncate.c | 43 ++++++++++++++++++++++++------------------- 5 files changed, 59 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index eb6e58e106c8..d2259a1da51c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -592,8 +592,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, pgoff_t end, unsigned int nr_pages, struct page **pages); diff --git a/mm/filemap.c b/mm/filemap.c index aefa6082b81b..021214fd5354 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2015,57 +2015,36 @@ reset: * @mapping: The address_space to search * @start: The starting page cache index * @end: The final page index (inclusive). - * @pvec: Where the resulting entries are placed. + * @fbatch: Where the resulting entries are placed. * @indices: The cache indices corresponding to the entries in @entries * * find_get_entries() will search for and return a batch of entries in - * the mapping. The entries are placed in @pvec. find_get_entries() - * takes a reference on any actual pages it returns. + * the mapping. The entries are placed in @fbatch. find_get_entries() + * takes a reference on any actual folios it returns. * - * The search returns a group of mapping-contiguous page cache entries - * with ascending indexes. There may be holes in the indices due to - * not-present pages. + * The entries have ascending indexes. The indices may not be consecutive + * due to not-present entries or large folios. * - * Any shadow entries of evicted pages, or swap entries from + * Any shadow entries of evicted folios, or swap entries from * shmem/tmpfs, are included in the returned array. * - * If it finds a Transparent Huge Page, head or tail, find_get_entries() - * stops at that page: the caller is likely to have a better way to handle - * the compound page as a whole, and then skip its extent, than repeatedly - * calling find_get_entries() to return all its tails. - * - * Return: the number of pages and shadow entries which were found. + * Return: The number of entries which were found. */ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) { XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; - unsigned int ret = 0; - unsigned nr_entries = PAGEVEC_SIZE; rcu_read_lock(); while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) { - struct page *page = &folio->page; - /* - * Terminate early on finding a THP, to allow the caller to - * handle it all at once; but continue if this is hugetlbfs. - */ - if (!xa_is_value(folio) && folio_test_large(folio) && - !folio_test_hugetlb(folio)) { - page = folio_file_page(folio, xas.xa_index); - nr_entries = ret + 1; - } - - indices[ret] = xas.xa_index; - pvec->pages[ret] = page; - if (++ret == nr_entries) + indices[fbatch->nr] = xas.xa_index; + if (!folio_batch_add(fbatch, folio)) break; } rcu_read_unlock(); - pvec->nr = ret; - return ret; + return folio_batch_count(fbatch); } /** diff --git a/mm/internal.h b/mm/internal.h index e5f3ff3ae24e..07124e95e790 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -12,6 +12,8 @@ #include #include +struct folio_batch; + /* * The set of flags that only affect watermark checking and reclaim * behaviour. This is used by the MM to obey the caller constraints @@ -92,6 +94,8 @@ static inline void force_page_cache_readahead(struct address_space *mapping, unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct pagevec *pvec, pgoff_t *indices); +unsigned find_get_entries(struct address_space *mapping, pgoff_t start, + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); int truncate_inode_folio(struct address_space *mapping, struct folio *folio); diff --git a/mm/shmem.c b/mm/shmem.c index dbef008fb6e5..e909c163fb38 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -920,6 +920,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, unsigned int partial_start = lstart & (PAGE_SIZE - 1); unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); struct pagevec pvec; + struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; long nr_swaps_freed = 0; pgoff_t index; @@ -987,11 +988,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (start >= end) return; + folio_batch_init(&fbatch); index = start; while (index < end) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &pvec, + if (!find_get_entries(mapping, index, end - 1, &fbatch, indices)) { /* If all gone or hole-punch or unfalloc, we're done */ if (index == start || end != -1) @@ -1000,14 +1002,14 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index = start; continue; } - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; index = indices[i]; - if (xa_is_value(page)) { + if (xa_is_value(folio)) { if (unfalloc) continue; - if (shmem_free_swap(mapping, index, page)) { + if (shmem_free_swap(mapping, index, folio)) { /* Swap was replaced by page: retry */ index--; break; @@ -1016,33 +1018,35 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, continue; } - lock_page(page); + folio_lock(folio); - if (!unfalloc || !PageUptodate(page)) { - if (page_mapping(page) != mapping) { + if (!unfalloc || !folio_test_uptodate(folio)) { + struct page *page = folio_file_page(folio, + index); + if (folio_mapping(folio) != mapping) { /* Page was replaced by swap: retry */ - unlock_page(page); + folio_unlock(folio); index--; break; } - VM_BUG_ON_PAGE(PageWriteback(page), page); + VM_BUG_ON_FOLIO(folio_test_writeback(folio), + folio); if (shmem_punch_compound(page, start, end)) - truncate_inode_folio(mapping, - page_folio(page)); + truncate_inode_folio(mapping, folio); else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Wipe the page and don't get stuck */ clear_highpage(page); flush_dcache_page(page); - set_page_dirty(page); + folio_mark_dirty(folio); if (index < round_up(start, HPAGE_PMD_NR)) start = index + 1; } } - unlock_page(page); + folio_unlock(folio); } - pagevec_remove_exceptionals(&pvec); - pagevec_release(&pvec); + folio_batch_remove_exceptionals(&fbatch); + folio_batch_release(&fbatch); index++; } diff --git a/mm/truncate.c b/mm/truncate.c index 5370094641d6..357af144df63 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -108,6 +108,13 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, pvec->nr = j; } +static void truncate_folio_batch_exceptionals(struct address_space *mapping, + struct folio_batch *fbatch, pgoff_t *indices) +{ + truncate_exceptional_pvec_entries(mapping, (struct pagevec *)fbatch, + indices); +} + /* * Invalidate exceptional entry if easily possible. This handles exceptional * entries for invalidate_inode_pages(). @@ -297,6 +304,7 @@ void truncate_inode_pages_range(struct address_space *mapping, unsigned int partial_start; /* inclusive */ unsigned int partial_end; /* exclusive */ struct pagevec pvec; + struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; @@ -379,10 +387,11 @@ void truncate_inode_pages_range(struct address_space *mapping, if (start >= end) goto out; + folio_batch_init(&fbatch); index = start; for ( ; ; ) { cond_resched(); - if (!find_get_entries(mapping, index, end - 1, &pvec, + if (!find_get_entries(mapping, index, end - 1, &fbatch, indices)) { /* If all gone from start onwards, we're done */ if (index == start) @@ -392,16 +401,14 @@ void truncate_inode_pages_range(struct address_space *mapping, continue; } - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; - struct folio *folio; + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing page->index */ index = indices[i]; - if (xa_is_value(page)) + if (xa_is_value(folio)) continue; - folio = page_folio(page); folio_lock(folio); VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio); @@ -410,8 +417,8 @@ void truncate_inode_pages_range(struct address_space *mapping, folio_unlock(folio); index = folio_index(folio) + folio_nr_pages(folio) - 1; } - truncate_exceptional_pvec_entries(mapping, &pvec, indices); - pagevec_release(&pvec); + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); + folio_batch_release(&fbatch); index++; } @@ -625,7 +632,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { pgoff_t indices[PAGEVEC_SIZE]; - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t index; int i; int ret = 0; @@ -635,23 +642,21 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (mapping_empty(mapping)) goto out; - pagevec_init(&pvec); + folio_batch_init(&fbatch); index = start; - while (find_get_entries(mapping, index, end, &pvec, indices)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; - struct folio *folio; + while (find_get_entries(mapping, index, end, &fbatch, indices)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ index = indices[i]; - if (xa_is_value(page)) { + if (xa_is_value(folio)) { if (!invalidate_exceptional_entry2(mapping, - index, page)) + index, folio)) ret = -EBUSY; continue; } - folio = page_folio(page); if (!did_range_unmap && folio_mapped(folio)) { /* @@ -684,8 +689,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, ret = ret2; folio_unlock(folio); } - pagevec_remove_exceptionals(&pvec); - pagevec_release(&pvec); + folio_batch_remove_exceptionals(&fbatch); + folio_batch_release(&fbatch); cond_resched(); index++; } -- cgit v1.2.3-59-g8ed1b From 51dcbdac28d4dde915f78adf08bb3fac87f516e9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Dec 2021 14:15:07 -0500 Subject: mm: Convert find_lock_entries() to use a folio_batch find_lock_entries() already only returned the head page of folios, so convert it to return a folio_batch instead of a pagevec. That cascades through converting truncate_inode_pages_range() to delete_from_page_cache_batch() and page_cache_delete_batch(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- fs/f2fs/f2fs.h | 2 ++ include/linux/pagemap.h | 4 +-- mm/filemap.c | 60 +++++++++++++++++++++---------------------- mm/internal.h | 2 +- mm/shmem.c | 14 +++++------ mm/truncate.c | 67 +++++++++++++++++++++---------------------------- 6 files changed, 69 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ce9fc9f13000..d0d603187171 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -28,6 +28,8 @@ #include #include +struct pagevec; + #ifdef CONFIG_F2FS_CHECK_FS #define f2fs_bug_on(sbi, condition) BUG_ON(condition) #else diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d2259a1da51c..6e038811f4c8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -16,7 +16,7 @@ #include /* for in_interrupt() */ #include -struct pagevec; +struct folio_batch; static inline bool mapping_empty(struct address_space *mapping) { @@ -936,7 +936,7 @@ static inline void __delete_from_page_cache(struct page *page, void *shadow) } void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, - struct pagevec *pvec); + struct folio_batch *fbatch); int try_to_release_page(struct page *page, gfp_t gfp); bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, diff --git a/mm/filemap.c b/mm/filemap.c index 021214fd5354..9d3bae3e36c3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -270,30 +270,29 @@ void filemap_remove_folio(struct folio *folio) } /* - * page_cache_delete_batch - delete several pages from page cache - * @mapping: the mapping to which pages belong - * @pvec: pagevec with pages to delete + * page_cache_delete_batch - delete several folios from page cache + * @mapping: the mapping to which folios belong + * @fbatch: batch of folios to delete * - * The function walks over mapping->i_pages and removes pages passed in @pvec - * from the mapping. The function expects @pvec to be sorted by page index - * and is optimised for it to be dense. - * It tolerates holes in @pvec (mapping entries at those indices are not - * modified). The function expects only THP head pages to be present in the - * @pvec. + * The function walks over mapping->i_pages and removes folios passed in + * @fbatch from the mapping. The function expects @fbatch to be sorted + * by page index and is optimised for it to be dense. + * It tolerates holes in @fbatch (mapping entries at those indices are not + * modified). * * The function expects the i_pages lock to be held. */ static void page_cache_delete_batch(struct address_space *mapping, - struct pagevec *pvec) + struct folio_batch *fbatch) { - XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); + XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index); int total_pages = 0; int i = 0; struct folio *folio; mapping_set_update(&xas, mapping); xas_for_each(&xas, folio, ULONG_MAX) { - if (i >= pagevec_count(pvec)) + if (i >= folio_batch_count(fbatch)) break; /* A swap/dax/shadow entry got inserted? Skip it. */ @@ -306,9 +305,9 @@ static void page_cache_delete_batch(struct address_space *mapping, * means our page has been removed, which shouldn't be * possible because we're holding the PageLock. */ - if (&folio->page != pvec->pages[i]) { + if (folio != fbatch->folios[i]) { VM_BUG_ON_FOLIO(folio->index > - pvec->pages[i]->index, folio); + fbatch->folios[i]->index, folio); continue; } @@ -316,12 +315,11 @@ static void page_cache_delete_batch(struct address_space *mapping, if (folio->index == xas.xa_index) folio->mapping = NULL; - /* Leave page->index set: truncation lookup relies on it */ + /* Leave folio->index set: truncation lookup relies on it */ /* - * Move to the next page in the vector if this is a regular - * page or the index is of the last sub-page of this compound - * page. + * Move to the next folio in the batch if this is a regular + * folio or the index is of the last sub-page of this folio. */ if (folio->index + folio_nr_pages(folio) - 1 == xas.xa_index) i++; @@ -332,29 +330,29 @@ static void page_cache_delete_batch(struct address_space *mapping, } void delete_from_page_cache_batch(struct address_space *mapping, - struct pagevec *pvec) + struct folio_batch *fbatch) { int i; - if (!pagevec_count(pvec)) + if (!folio_batch_count(fbatch)) return; spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); - for (i = 0; i < pagevec_count(pvec); i++) { - struct folio *folio = page_folio(pvec->pages[i]); + for (i = 0; i < folio_batch_count(fbatch); i++) { + struct folio *folio = fbatch->folios[i]; trace_mm_filemap_delete_from_page_cache(folio); filemap_unaccount_folio(mapping, folio); } - page_cache_delete_batch(mapping, pvec); + page_cache_delete_batch(mapping, fbatch); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); - for (i = 0; i < pagevec_count(pvec); i++) - filemap_free_folio(mapping, page_folio(pvec->pages[i])); + for (i = 0; i < folio_batch_count(fbatch); i++) + filemap_free_folio(mapping, fbatch->folios[i]); } int filemap_check_errors(struct address_space *mapping) @@ -2052,8 +2050,8 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, * @mapping: The address_space to search. * @start: The starting page cache index. * @end: The final page index (inclusive). - * @pvec: Where the resulting entries are placed. - * @indices: The cache indices of the entries in @pvec. + * @fbatch: Where the resulting entries are placed. + * @indices: The cache indices of the entries in @fbatch. * * find_lock_entries() will return a batch of entries from @mapping. * Swap, shadow and DAX entries are included. Folios are returned @@ -2068,7 +2066,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start, * Return: The number of entries which were found. */ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices) + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices) { XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; @@ -2088,8 +2086,8 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), folio); } - indices[pvec->nr] = xas.xa_index; - if (!pagevec_add(pvec, &folio->page)) + indices[fbatch->nr] = xas.xa_index; + if (!folio_batch_add(fbatch, folio)) break; goto next; unlock: @@ -2106,7 +2104,7 @@ next: } rcu_read_unlock(); - return pagevec_count(pvec); + return folio_batch_count(fbatch); } /** diff --git a/mm/internal.h b/mm/internal.h index 07124e95e790..c52c05dc6b1f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -93,7 +93,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping, } unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, - pgoff_t end, struct pagevec *pvec, pgoff_t *indices); + pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices); void filemap_free_folio(struct address_space *mapping, struct folio *folio); diff --git a/mm/shmem.c b/mm/shmem.c index e909c163fb38..bbfa2d05e787 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -919,7 +919,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, pgoff_t end = (lend + 1) >> PAGE_SHIFT; unsigned int partial_start = lstart & (PAGE_SIZE - 1); unsigned int partial_end = (lend + 1) & (PAGE_SIZE - 1); - struct pagevec pvec; struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; long nr_swaps_freed = 0; @@ -932,12 +931,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (info->fallocend > start && info->fallocend <= end && !unfalloc) info->fallocend = start; - pagevec_init(&pvec); + folio_batch_init(&fbatch); index = start; while (index < end && find_lock_entries(mapping, index, end - 1, - &pvec, indices)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - struct folio *folio = (struct folio *)pvec.pages[i]; + &fbatch, indices)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct folio *folio = fbatch.folios[i]; index = indices[i]; @@ -954,8 +953,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, truncate_inode_folio(mapping, folio); folio_unlock(folio); } - pagevec_remove_exceptionals(&pvec); - pagevec_release(&pvec); + folio_batch_remove_exceptionals(&fbatch); + folio_batch_release(&fbatch); cond_resched(); index++; } @@ -988,7 +987,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (start >= end) return; - folio_batch_init(&fbatch); index = start; while (index < end) { cond_resched(); diff --git a/mm/truncate.c b/mm/truncate.c index 357af144df63..e7f5762c43d3 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -56,11 +56,11 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, /* * Unconditionally remove exceptional entries. Usually called from truncate - * path. Note that the pagevec may be altered by this function by removing + * path. Note that the folio_batch may be altered by this function by removing * exceptional entries similar to what pagevec_remove_exceptionals does. */ -static void truncate_exceptional_pvec_entries(struct address_space *mapping, - struct pagevec *pvec, pgoff_t *indices) +static void truncate_folio_batch_exceptionals(struct address_space *mapping, + struct folio_batch *fbatch, pgoff_t *indices) { int i, j; bool dax; @@ -69,11 +69,11 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, if (shmem_mapping(mapping)) return; - for (j = 0; j < pagevec_count(pvec); j++) - if (xa_is_value(pvec->pages[j])) + for (j = 0; j < folio_batch_count(fbatch); j++) + if (xa_is_value(fbatch->folios[j])) break; - if (j == pagevec_count(pvec)) + if (j == folio_batch_count(fbatch)) return; dax = dax_mapping(mapping); @@ -82,12 +82,12 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, xa_lock_irq(&mapping->i_pages); } - for (i = j; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; + for (i = j; i < folio_batch_count(fbatch); i++) { + struct folio *folio = fbatch->folios[i]; pgoff_t index = indices[i]; - if (!xa_is_value(page)) { - pvec->pages[j++] = page; + if (!xa_is_value(folio)) { + fbatch->folios[j++] = folio; continue; } @@ -96,7 +96,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, continue; } - __clear_shadow_entry(mapping, index, page); + __clear_shadow_entry(mapping, index, folio); } if (!dax) { @@ -105,14 +105,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } - pvec->nr = j; -} - -static void truncate_folio_batch_exceptionals(struct address_space *mapping, - struct folio_batch *fbatch, pgoff_t *indices) -{ - truncate_exceptional_pvec_entries(mapping, (struct pagevec *)fbatch, - indices); + fbatch->nr = j; } /* @@ -303,7 +296,6 @@ void truncate_inode_pages_range(struct address_space *mapping, pgoff_t end; /* exclusive */ unsigned int partial_start; /* inclusive */ unsigned int partial_end; /* exclusive */ - struct pagevec pvec; struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; @@ -333,18 +325,18 @@ void truncate_inode_pages_range(struct address_space *mapping, else end = (lend + 1) >> PAGE_SHIFT; - pagevec_init(&pvec); + folio_batch_init(&fbatch); index = start; while (index < end && find_lock_entries(mapping, index, end - 1, - &pvec, indices)) { - index = indices[pagevec_count(&pvec) - 1] + 1; - truncate_exceptional_pvec_entries(mapping, &pvec, indices); - for (i = 0; i < pagevec_count(&pvec); i++) - truncate_cleanup_folio(page_folio(pvec.pages[i])); - delete_from_page_cache_batch(mapping, &pvec); - for (i = 0; i < pagevec_count(&pvec); i++) - unlock_page(pvec.pages[i]); - pagevec_release(&pvec); + &fbatch, indices)) { + index = indices[folio_batch_count(&fbatch) - 1] + 1; + truncate_folio_batch_exceptionals(mapping, &fbatch, indices); + for (i = 0; i < folio_batch_count(&fbatch); i++) + truncate_cleanup_folio(fbatch.folios[i]); + delete_from_page_cache_batch(mapping, &fbatch); + for (i = 0; i < folio_batch_count(&fbatch); i++) + folio_unlock(fbatch.folios[i]); + folio_batch_release(&fbatch); cond_resched(); } @@ -387,7 +379,6 @@ void truncate_inode_pages_range(struct address_space *mapping, if (start >= end) goto out; - folio_batch_init(&fbatch); index = start; for ( ; ; ) { cond_resched(); @@ -489,16 +480,16 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_pagevec) { pgoff_t indices[PAGEVEC_SIZE]; - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t index = start; unsigned long ret; unsigned long count = 0; int i; - pagevec_init(&pvec); - while (find_lock_entries(mapping, index, end, &pvec, indices)) { - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; + folio_batch_init(&fbatch); + while (find_lock_entries(mapping, index, end, &fbatch, indices)) { + for (i = 0; i < folio_batch_count(&fbatch); i++) { + struct page *page = &fbatch.folios[i]->page; /* We rely upon deletion not changing page->index */ index = indices[i]; @@ -525,8 +516,8 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping, } count += ret; } - pagevec_remove_exceptionals(&pvec); - pagevec_release(&pvec); + folio_batch_remove_exceptionals(&fbatch); + folio_batch_release(&fbatch); cond_resched(); index++; } -- cgit v1.2.3-59-g8ed1b From 1613fac9aaf840af76faa747ea428a714af98dbd Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Dec 2021 14:28:49 -0500 Subject: mm: Remove pagevec_remove_exceptionals() All of its callers now call folio_batch_remove_exceptionals(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/pagevec.h | 6 +----- mm/swap.c | 26 +++++++++++++------------- mm/truncate.c | 2 +- 3 files changed, 15 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index c3fa616d7ae7..dda8d5868c81 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,7 +27,6 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); -void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, pgoff_t *start, pgoff_t end); @@ -146,8 +145,5 @@ static inline void folio_batch_release(struct folio_batch *fbatch) pagevec_release((struct pagevec *)fbatch); } -static inline void folio_batch_remove_exceptionals(struct folio_batch *fbatch) -{ - pagevec_remove_exceptionals((struct pagevec *)fbatch); -} +void folio_batch_remove_exceptionals(struct folio_batch *fbatch); #endif /* _LINUX_PAGEVEC_H */ diff --git a/mm/swap.c b/mm/swap.c index e8c9dc6d0377..74f6b311d7ee 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1077,24 +1077,24 @@ void __pagevec_lru_add(struct pagevec *pvec) } /** - * pagevec_remove_exceptionals - pagevec exceptionals pruning - * @pvec: The pagevec to prune + * folio_batch_remove_exceptionals() - Prune non-folios from a batch. + * @fbatch: The batch to prune * - * find_get_entries() fills both pages and XArray value entries (aka - * exceptional entries) into the pagevec. This function prunes all - * exceptionals from @pvec without leaving holes, so that it can be - * passed on to page-only pagevec operations. + * find_get_entries() fills a batch with both folios and shadow/swap/DAX + * entries. This function prunes all the non-folio entries from @fbatch + * without leaving holes, so that it can be passed on to folio-only batch + * operations. */ -void pagevec_remove_exceptionals(struct pagevec *pvec) +void folio_batch_remove_exceptionals(struct folio_batch *fbatch) { - int i, j; + unsigned int i, j; - for (i = 0, j = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - if (!xa_is_value(page)) - pvec->pages[j++] = page; + for (i = 0, j = 0; i < folio_batch_count(fbatch); i++) { + struct folio *folio = fbatch->folios[i]; + if (!xa_is_value(folio)) + fbatch->folios[j++] = folio; } - pvec->nr = j; + fbatch->nr = j; } /** diff --git a/mm/truncate.c b/mm/truncate.c index e7f5762c43d3..a1113b0abb30 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -57,7 +57,7 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, /* * Unconditionally remove exceptional entries. Usually called from truncate * path. Note that the folio_batch may be altered by this function by removing - * exceptional entries similar to what pagevec_remove_exceptionals does. + * exceptional entries similar to what folio_batch_remove_exceptionals() does. */ static void truncate_folio_batch_exceptionals(struct address_space *mapping, struct folio_batch *fbatch, pgoff_t *indices) -- cgit v1.2.3-59-g8ed1b From 25a8de7f8d970ffa7263bd9d32a08138cd949f17 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 27 Aug 2021 07:21:49 -0400 Subject: XArray: Add xas_advance() Add a new helper function to help iterate over multi-index entries. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: William Kucharski --- include/linux/xarray.h | 18 ++++++++++++++++++ lib/xarray.c | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index a91e3d90df8a..d6d5da6ed735 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1580,6 +1580,24 @@ static inline void xas_set(struct xa_state *xas, unsigned long index) xas->xa_node = XAS_RESTART; } +/** + * xas_advance() - Skip over sibling entries. + * @xas: XArray operation state. + * @index: Index of last sibling entry. + * + * Move the operation state to refer to the last sibling entry. + * This is useful for loops that normally want to see sibling + * entries but sometimes want to skip them. Use xas_set() if you + * want to move to an index which is not part of this entry. + */ +static inline void xas_advance(struct xa_state *xas, unsigned long index) +{ + unsigned char shift = xas_is_node(xas) ? xas->xa_node->shift : 0; + + xas->xa_index = index; + xas->xa_offset = (index >> shift) & XA_CHUNK_MASK; +} + /** * xas_set_order() - Set up XArray operation state for a multislot entry. * @xas: XArray operation state. diff --git a/lib/xarray.c b/lib/xarray.c index f5d8f54907b4..6f47f6375808 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -157,7 +157,7 @@ static void xas_move_index(struct xa_state *xas, unsigned long offset) xas->xa_index += offset << shift; } -static void xas_advance(struct xa_state *xas) +static void xas_next_offset(struct xa_state *xas) { xas->xa_offset++; xas_move_index(xas, xas->xa_offset); @@ -1250,7 +1250,7 @@ void *xas_find(struct xa_state *xas, unsigned long max) xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; } - xas_advance(xas); + xas_next_offset(xas); while (xas->xa_node && (xas->xa_index <= max)) { if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { @@ -1268,7 +1268,7 @@ void *xas_find(struct xa_state *xas, unsigned long max) if (entry && !xa_is_sibling(entry)) return entry; - xas_advance(xas); + xas_next_offset(xas); } if (!xas->xa_node) -- cgit v1.2.3-59-g8ed1b From 6b24ca4a1a8d4ee3221d6d44ddbb99f542e4bda3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 27 Jun 2020 22:19:08 -0400 Subject: mm: Use multi-index entries in the page cache We currently store large folios as 2^N consecutive entries. While this consumes rather more memory than necessary, it also turns out to be buggy. A writeback operation which starts within a tail page of a dirty folio will not write back the folio as the xarray's dirty bit is only set on the head index. With multi-index entries, the dirty bit will be found no matter where in the folio the operation starts. This does end up simplifying the page cache slightly, although not as much as I had hoped. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: William Kucharski --- include/linux/pagemap.h | 10 -------- mm/filemap.c | 61 +++++++++++++++++++++++++++++++------------------ mm/huge_memory.c | 18 +++++++++++---- mm/khugepaged.c | 12 +++++++++- mm/migrate.c | 8 ------- mm/shmem.c | 19 +++++++-------- 6 files changed, 72 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 6e038811f4c8..704cb1b4b15d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1125,16 +1125,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; rac->_batch_count += thp_nr_pages(page); - - /* - * The page cache isn't using multi-index entries yet, - * so the xas cursor needs to be manually moved to the - * next index. This can be removed once the page cache - * is converted. - */ - if (PageHead(page)) - xas_set(&xas, rac->_index + rac->_batch_count); - if (i == array_sz) break; } diff --git a/mm/filemap.c b/mm/filemap.c index 9d3bae3e36c3..33077c264d79 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -135,7 +135,6 @@ static void page_cache_delete(struct address_space *mapping, } VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - VM_BUG_ON_FOLIO(nr != 1 && shadow, folio); xas_store(&xas, shadow); xas_init_marks(&xas); @@ -286,7 +285,7 @@ static void page_cache_delete_batch(struct address_space *mapping, struct folio_batch *fbatch) { XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index); - int total_pages = 0; + long total_pages = 0; int i = 0; struct folio *folio; @@ -313,18 +312,12 @@ static void page_cache_delete_batch(struct address_space *mapping, WARN_ON_ONCE(!folio_test_locked(folio)); - if (folio->index == xas.xa_index) - folio->mapping = NULL; + folio->mapping = NULL; /* Leave folio->index set: truncation lookup relies on it */ - /* - * Move to the next folio in the batch if this is a regular - * folio or the index is of the last sub-page of this folio. - */ - if (folio->index + folio_nr_pages(folio) - 1 == xas.xa_index) - i++; + i++; xas_store(&xas, NULL); - total_pages++; + total_pages += folio_nr_pages(folio); } mapping->nrpages -= total_pages; } @@ -2089,24 +2082,27 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, indices[fbatch->nr] = xas.xa_index; if (!folio_batch_add(fbatch, folio)) break; - goto next; + continue; unlock: folio_unlock(folio); put: folio_put(folio); -next: - if (!xa_is_value(folio) && folio_test_large(folio)) { - xas_set(&xas, folio->index + folio_nr_pages(folio)); - /* Did we wrap on 32-bit? */ - if (!xas.xa_index) - break; - } } rcu_read_unlock(); return folio_batch_count(fbatch); } +static inline +bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) +{ + if (!folio_test_large(folio) || folio_test_hugetlb(folio)) + return false; + if (index >= max) + return false; + return index < folio->index + folio_nr_pages(folio) - 1; +} + /** * find_get_pages_range - gang pagecache lookup * @mapping: The address_space to search @@ -2145,11 +2141,17 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, if (xa_is_value(folio)) continue; +again: pages[ret] = folio_file_page(folio, xas.xa_index); if (++ret == nr_pages) { *start = xas.xa_index + 1; goto out; } + if (folio_more_pages(folio, xas.xa_index, end)) { + xas.xa_index++; + folio_ref_inc(folio); + goto again; + } } /* @@ -2207,9 +2209,15 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, if (unlikely(folio != xas_reload(&xas))) goto put_page; - pages[ret] = &folio->page; +again: + pages[ret] = folio_file_page(folio, xas.xa_index); if (++ret == nr_pages) break; + if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) { + xas.xa_index++; + folio_ref_inc(folio); + goto again; + } continue; put_page: folio_put(folio); @@ -2334,8 +2342,7 @@ static void filemap_get_read_batch(struct address_space *mapping, break; if (folio_test_readahead(folio)) break; - xas.xa_index = folio->index + folio_nr_pages(folio) - 1; - xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK; + xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1); continue; put_folio: folio_put(folio); @@ -3284,6 +3291,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); do { +again: page = folio_file_page(folio, xas.xa_index); if (PageHWPoison(page)) goto unlock; @@ -3305,9 +3313,18 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, do_set_pte(vmf, page, addr); /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, addr, vmf->pte); + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { + xas.xa_index++; + folio_ref_inc(folio); + goto again; + } folio_unlock(folio); continue; unlock: + if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { + xas.xa_index++; + goto again; + } folio_unlock(folio); folio_put(folio); } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e5483347291c..f58524394dc1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2614,6 +2614,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); struct deferred_split *ds_queue = get_deferred_split_queue(head); + XA_STATE(xas, &head->mapping->i_pages, head->index); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int extra_pins, ret; @@ -2652,6 +2653,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) goto out; } + xas_split_alloc(&xas, head, compound_order(head), + mapping_gfp_mask(mapping) & GFP_RECLAIM_MASK); + if (xas_error(&xas)) { + ret = xas_error(&xas); + goto out; + } + anon_vma = NULL; i_mmap_lock_read(mapping); @@ -2681,13 +2689,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); if (mapping) { - XA_STATE(xas, &mapping->i_pages, page_index(head)); - /* * Check if the head page is present in page cache. * We assume all tail are present too, if head is there. */ - xa_lock(&mapping->i_pages); + xas_lock(&xas); + xas_reset(&xas); if (xas_load(&xas) != head) goto fail; } @@ -2703,6 +2710,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) if (mapping) { int nr = thp_nr_pages(head); + xas_split(&xas, head, thp_order(head)); if (PageSwapBacked(head)) { __mod_lruvec_page_state(head, NR_SHMEM_THPS, -nr); @@ -2719,7 +2727,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) spin_unlock(&ds_queue->split_queue_lock); fail: if (mapping) - xa_unlock(&mapping->i_pages); + xas_unlock(&xas); local_irq_enable(); remap_page(head, thp_nr_pages(head)); ret = -EBUSY; @@ -2733,6 +2741,8 @@ out_unlock: if (mapping) i_mmap_unlock_read(mapping); out: + /* Free any memory we didn't use */ + xas_nomem(&xas, 0); count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); return ret; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index e99101162f1a..2e1911cc3466 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1667,7 +1667,10 @@ static void collapse_file(struct mm_struct *mm, } count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC); - /* This will be less messy when we use multi-index entries */ + /* + * Ensure we have slots for all the pages in the range. This is + * almost certainly a no-op because most of the pages must be present + */ do { xas_lock_irq(&xas); xas_create_range(&xas); @@ -1892,6 +1895,9 @@ out_unlock: __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); } + /* Join all the small entries into a single multi-index entry */ + xas_set_order(&xas, start, HPAGE_PMD_ORDER); + xas_store(&xas, new_page); xa_locked: xas_unlock_irq(&xas); xa_unlocked: @@ -2013,6 +2019,10 @@ static void khugepaged_scan_file(struct mm_struct *mm, continue; } + /* + * XXX: khugepaged should compact smaller compound pages + * into a PMD sized page + */ if (PageTransCompound(page)) { result = SCAN_PAGE_COMPOUND; break; diff --git a/mm/migrate.c b/mm/migrate.c index 311638177536..7079e6b7dbe7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -433,14 +433,6 @@ int folio_migrate_mapping(struct address_space *mapping, } xas_store(&xas, newfolio); - if (nr > 1) { - int i; - - for (i = 1; i < nr; i++) { - xas_next(&xas); - xas_store(&xas, newfolio); - } - } /* * Drop cache reference from old page by unfreezing diff --git a/mm/shmem.c b/mm/shmem.c index e4c9e5c7081f..28d627444a24 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -694,7 +694,6 @@ static int shmem_add_to_page_cache(struct page *page, struct mm_struct *charge_mm) { XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); - unsigned long i = 0; unsigned long nr = compound_nr(page); int error; @@ -721,20 +720,18 @@ static int shmem_add_to_page_cache(struct page *page, cgroup_throttle_swaprate(page, gfp); do { - void *entry; xas_lock_irq(&xas); - entry = xas_find_conflict(&xas); - if (entry != expected) + if (expected != xas_find_conflict(&xas)) { + xas_set_err(&xas, -EEXIST); + goto unlock; + } + if (expected && xas_find_conflict(&xas)) { xas_set_err(&xas, -EEXIST); - xas_create_range(&xas); - if (xas_error(&xas)) goto unlock; -next: - xas_store(&xas, page); - if (++i < nr) { - xas_next(&xas); - goto next; } + xas_store(&xas, page); + if (xas_error(&xas)) + goto unlock; if (PageTransHuge(page)) { count_vm_event(THP_FILE_ALLOC); __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); -- cgit v1.2.3-59-g8ed1b