From 9096bbe951ddd3f9dc813e73b5bde6d5715d1cdb Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 28 Apr 2022 23:15:58 -0700 Subject: mm: shmem: make shmem_init return void The return value of shmem_init is never used. So we can make it return void now. [akpm@linux-foundation.org: remove `return;' from void-returning function, per Muchun Song] Link: https://lkml.kernel.org/r/20220328112707.22217-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Muchun Song Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/shmem.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 4b2fea33158e..e504d734177b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3888,7 +3888,7 @@ static struct file_system_type shmem_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -int __init shmem_init(void) +void __init shmem_init(void) { int error; @@ -3913,14 +3913,13 @@ int __init shmem_init(void) else shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */ #endif - return 0; + return; out1: unregister_filesystem(&shmem_fs_type); out2: shmem_destroy_inodecache(); shm_mnt = ERR_PTR(error); - return error; } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) @@ -3998,14 +3997,12 @@ static struct file_system_type shmem_fs_type = { .fs_flags = FS_USERNS_MOUNT, }; -int __init shmem_init(void) +void __init shmem_init(void) { BUG_ON(register_filesystem(&shmem_fs_type) != 0); shm_mnt = kern_mount(&shmem_fs_type); BUG_ON(IS_ERR(shm_mnt)); - - return 0; } int shmem_unuse(unsigned int type) -- cgit v1.2.3-59-g8ed1b From 014bb1de4fc17d54907d54418126a9a9736f4aff Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 9 May 2022 18:20:47 -0700 Subject: mm: create new mm/swap.h header file Patch series "MM changes to improve swap-over-NFS support". Assorted improvements for swap-via-filesystem. This is a resend of these patches, rebased on current HEAD. The only substantial changes is that swap_dirty_folio has replaced swap_set_page_dirty. Currently swap-via-fs (SWP_FS_OPS) doesn't work for any filesystem. It has previously worked for NFS but that broke a few releases back. This series changes to use a new ->swap_rw rather than ->readpage and ->direct_IO. It also makes other improvements. There is a companion series already in linux-next which fixes various issues with NFS. Once both series land, a final patch is needed which changes NFS over to use ->swap_rw. This patch (of 10): Many functions declared in include/linux/swap.h are only used within mm/ Create a new "mm/swap.h" and move some of these declarations there. Remove the redundant 'extern' from the function declarations. [akpm@linux-foundation.org: mm/memory-failure.c needs mm/swap.h] Link: https://lkml.kernel.org/r/164859751830.29473.5309689752169286816.stgit@noble.brown Link: https://lkml.kernel.org/r/164859778120.29473.11725907882296224053.stgit@noble.brown Signed-off-by: NeilBrown Reviewed-by: Christoph Hellwig Tested-by: David Howells Tested-by: Geert Uytterhoeven Cc: Trond Myklebust Cc: Hugh Dickins Cc: Mel Gorman Cc: Miaohe Lin Signed-off-by: Andrew Morton --- include/linux/swap.h | 121 ---------------------------------------------- mm/huge_memory.c | 1 + mm/madvise.c | 1 + mm/memcontrol.c | 1 + mm/memory-failure.c | 1 + mm/memory.c | 1 + mm/mincore.c | 1 + mm/page_alloc.c | 1 + mm/page_io.c | 1 + mm/shmem.c | 1 + mm/swap.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++ mm/swap_state.c | 1 + mm/swapfile.c | 1 + mm/util.c | 1 + mm/vmscan.c | 1 + mm/zswap.c | 2 + 16 files changed, 148 insertions(+), 121 deletions(-) create mode 100644 mm/swap.h (limited to 'mm/shmem.c') diff --git a/include/linux/swap.h b/include/linux/swap.h index e6d70a4156e8..def67112dce4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -427,62 +427,19 @@ extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP -#include /* for bio_end_io_t */ - -/* linux/mm/page_io.c */ -extern int swap_readpage(struct page *page, bool do_poll); -extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern void end_swap_bio_write(struct bio *bio); -extern int __swap_writepage(struct page *page, struct writeback_control *wbc, - bio_end_io_t end_write_func); bool swap_dirty_folio(struct address_space *mapping, struct folio *folio); - int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); -/* linux/mm/swap_state.c */ -/* One swap address space for each 64M swap space */ -#define SWAP_ADDRESS_SPACE_SHIFT 14 -#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) -extern struct address_space *swapper_spaces[]; -#define swap_address_space(entry) \ - (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ - >> SWAP_ADDRESS_SPACE_SHIFT]) static inline unsigned long total_swapcache_pages(void) { return global_node_page_state(NR_SWAPCACHE); } -extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *page); -extern void *get_shadow_from_swap_cache(swp_entry_t entry); -extern int add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp, void **shadowp); -extern void __delete_from_swap_cache(struct page *page, - swp_entry_t entry, void *shadow); -extern void delete_from_swap_cache(struct page *); -extern void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end); -extern void free_swap_cache(struct page *); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); -extern struct page *lookup_swap_cache(swp_entry_t entry, - struct vm_area_struct *vma, - unsigned long addr); -struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); -extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, - struct vm_area_struct *vma, unsigned long addr, - bool do_poll); -extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, - struct vm_area_struct *vma, unsigned long addr, - bool *new_page_allocated); -extern struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, - struct vm_fault *vmf); -extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, - struct vm_fault *vmf); - /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -535,12 +492,6 @@ static inline void put_swap_device(struct swap_info_struct *si) } #else /* CONFIG_SWAP */ - -static inline int swap_readpage(struct page *page, bool do_poll) -{ - return 0; -} - static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return NULL; @@ -555,11 +506,6 @@ static inline void put_swap_device(struct swap_info_struct *si) { } -static inline struct address_space *swap_address_space(swp_entry_t entry) -{ - return NULL; -} - #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL @@ -574,14 +520,6 @@ static inline struct address_space *swap_address_space(swp_entry_t entry) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); -static inline void free_swap_cache(struct page *page) -{ -} - -static inline void show_swap_cache_info(void) -{ -} - /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ #define free_swap_and_cache(e) is_pfn_swap_entry(e) @@ -607,65 +545,6 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp) { } -static inline struct page *swap_cluster_readahead(swp_entry_t entry, - gfp_t gfp_mask, struct vm_fault *vmf) -{ - return NULL; -} - -static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, - struct vm_fault *vmf) -{ - return NULL; -} - -static inline int swap_writepage(struct page *p, struct writeback_control *wbc) -{ - return 0; -} - -static inline struct page *lookup_swap_cache(swp_entry_t swp, - struct vm_area_struct *vma, - unsigned long addr) -{ - return NULL; -} - -static inline -struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) -{ - return find_get_page(mapping, index); -} - -static inline int add_to_swap(struct page *page) -{ - return 0; -} - -static inline void *get_shadow_from_swap_cache(swp_entry_t entry) -{ - return NULL; -} - -static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp_mask, void **shadowp) -{ - return -1; -} - -static inline void __delete_from_swap_cache(struct page *page, - swp_entry_t entry, void *shadow) -{ -} - -static inline void delete_from_swap_cache(struct page *page) -{ -} - -static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, - unsigned long end) -{ -} static inline int page_swapcount(struct page *page) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a2f44d8d3d47..86eae6834db7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -39,6 +39,7 @@ #include #include #include "internal.h" +#include "swap.h" #define CREATE_TRACE_POINTS #include diff --git a/mm/madvise.c b/mm/madvise.c index c965fac7b13a..b19daedde0ea 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -35,6 +35,7 @@ #include #include "internal.h" +#include "swap.h" struct madvise_walk_private { struct mmu_gather *tlb; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d180ef985b17..235c85e17c2d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -67,6 +67,7 @@ #include #include #include "slab.h" +#include "swap.h" #include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9711c0eb70b0..1d117190c350 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -59,6 +59,7 @@ #include #include #include +#include "swap.h" #include "internal.h" #include "ras/ras_event.h" diff --git a/mm/memory.c b/mm/memory.c index bca60092b4d5..6e9428ae6d66 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -86,6 +86,7 @@ #include "pgalloc-track.h" #include "internal.h" +#include "swap.h" #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. diff --git a/mm/mincore.c b/mm/mincore.c index 9122676b54d6..f4f627325e12 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -20,6 +20,7 @@ #include #include +#include "swap.h" static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f01c71e41bcf..e85a0dc22761 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -81,6 +81,7 @@ #include "internal.h" #include "shuffle.h" #include "page_reporting.h" +#include "swap.h" /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */ typedef int __bitwise fpi_t; diff --git a/mm/page_io.c b/mm/page_io.c index 89fbf3cae30f..4b359e59bfdc 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -26,6 +26,7 @@ #include #include #include +#include "swap.h" void end_swap_bio_write(struct bio *bio) { diff --git a/mm/shmem.c b/mm/shmem.c index e504d734177b..913c0b039333 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -38,6 +38,7 @@ #include #include #include +#include "swap.h" static struct vfsmount *shm_mnt; diff --git a/mm/swap.h b/mm/swap.h new file mode 100644 index 000000000000..f8265bf0ce00 --- /dev/null +++ b/mm/swap.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MM_SWAP_H +#define _MM_SWAP_H + +#ifdef CONFIG_SWAP +#include /* for bio_end_io_t */ + +/* linux/mm/page_io.c */ +int swap_readpage(struct page *page, bool do_poll); +int swap_writepage(struct page *page, struct writeback_control *wbc); +void end_swap_bio_write(struct bio *bio); +int __swap_writepage(struct page *page, struct writeback_control *wbc, + bio_end_io_t end_write_func); + +/* linux/mm/swap_state.c */ +/* One swap address space for each 64M swap space */ +#define SWAP_ADDRESS_SPACE_SHIFT 14 +#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) +extern struct address_space *swapper_spaces[]; +#define swap_address_space(entry) \ + (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ + >> SWAP_ADDRESS_SPACE_SHIFT]) + +void show_swap_cache_info(void); +int add_to_swap(struct page *page); +void *get_shadow_from_swap_cache(swp_entry_t entry); +int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp); +void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow); +void delete_from_swap_cache(struct page *page); +void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); +void free_swap_cache(struct page *page); +struct page *lookup_swap_cache(swp_entry_t entry, + struct vm_area_struct *vma, + unsigned long addr); +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); + +struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, + unsigned long addr, + bool do_poll); +struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, + unsigned long addr, + bool *new_page_allocated); +struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); +struct page *swapin_readahead(swp_entry_t entry, gfp_t flag, + struct vm_fault *vmf); + +#else /* CONFIG_SWAP */ +static inline int swap_readpage(struct page *page, bool do_poll) +{ + return 0; +} + +static inline struct address_space *swap_address_space(swp_entry_t entry) +{ + return NULL; +} + +static inline void free_swap_cache(struct page *page) +{ +} + +static inline void show_swap_cache_info(void) +{ +} + +static inline struct page *swap_cluster_readahead(swp_entry_t entry, + gfp_t gfp_mask, struct vm_fault *vmf) +{ + return NULL; +} + +static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, + struct vm_fault *vmf) +{ + return NULL; +} + +static inline int swap_writepage(struct page *p, struct writeback_control *wbc) +{ + return 0; +} + +static inline struct page *lookup_swap_cache(swp_entry_t swp, + struct vm_area_struct *vma, + unsigned long addr) +{ + return NULL; +} + +static inline +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) +{ + return find_get_page(mapping, index); +} + +static inline int add_to_swap(struct page *page) +{ + return 0; +} + +static inline void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + return NULL; +} + +static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp_mask, void **shadowp) +{ + return -1; +} + +static inline void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow) +{ +} + +static inline void delete_from_swap_cache(struct page *page) +{ +} + +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ +} + +#endif /* CONFIG_SWAP */ +#endif /* _MM_SWAP_H */ diff --git a/mm/swap_state.c b/mm/swap_state.c index 013856004825..5437dd317cf3 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -23,6 +23,7 @@ #include #include #include "internal.h" +#include "swap.h" /* * swapper_space is a fiction, retained to simplify the path through diff --git a/mm/swapfile.c b/mm/swapfile.c index 7279b2d2d71d..f08bf675a613 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -44,6 +44,7 @@ #include #include #include +#include "swap.h" static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); diff --git a/mm/util.c b/mm/util.c index 3492a9e81aa3..f27c796239b1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -27,6 +27,7 @@ #include #include "internal.h" +#include "swap.h" /** * kfree_const - conditionally free memory diff --git a/mm/vmscan.c b/mm/vmscan.c index 6e255e2867d2..b1b91d5b9f49 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -59,6 +59,7 @@ #include #include "internal.h" +#include "swap.h" #define CREATE_TRACE_POINTS #include diff --git a/mm/zswap.c b/mm/zswap.c index 3efd8cae315e..2c5db4cbedea 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -36,6 +36,8 @@ #include #include +#include "swap.h" + /********************************* * statistics **********************************/ -- cgit v1.2.3-59-g8ed1b From 8ee79edff6d3b43b2d0c1e41f92b32e128988b22 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 May 2022 20:22:52 -0700 Subject: mm/shmem: take care of UFFDIO_COPY_MODE_WP Pass wp_copy into shmem_mfill_atomic_pte() through the stack, then apply the UFFD_WP bit properly when the UFFDIO_COPY on shmem is with UFFDIO_COPY_MODE_WP. wp_copy lands mfill_atomic_install_pte() finally. Note: we must do pte_wrprotect() if !writable in mfill_atomic_install_pte(), as mk_pte() could return a writable pte (e.g., when VM_SHARED on a shmem file). Link: https://lkml.kernel.org/r/20220405014841.14185-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 4 ++-- mm/shmem.c | 4 ++-- mm/userfaultfd.c | 23 ++++++++++++++++++----- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'mm/shmem.c') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 3e915cc550bc..a68f982f22d1 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -145,11 +145,11 @@ extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, - bool zeropage, + bool zeropage, bool wp_copy, struct page **pagep); #else /* !CONFIG_SHMEM */ #define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \ - src_addr, zeropage, pagep) ({ BUG(); 0; }) + src_addr, zeropage, wp_copy, pagep) ({ BUG(); 0; }) #endif /* CONFIG_SHMEM */ #endif /* CONFIG_USERFAULTFD */ diff --git a/mm/shmem.c b/mm/shmem.c index 913c0b039333..4a4a2e628289 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2319,7 +2319,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, - bool zeropage, + bool zeropage, bool wp_copy, struct page **pagep) { struct inode *inode = file_inode(dst_vma->vm_file); @@ -2392,7 +2392,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, true, false); + page, true, wp_copy); if (ret) goto out_delete_from_cache; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 9cdaed939203..2b6ec3352147 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -78,10 +78,19 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, * Always mark a PTE as write-protected when needed, regardless of * VM_WRITE, which the user might change. */ - if (wp_copy) + if (wp_copy) { _dst_pte = pte_mkuffd_wp(_dst_pte); - else if (writable) + writable = false; + } + + if (writable) _dst_pte = pte_mkwrite(_dst_pte); + else + /* + * We need this to make sure write bit removed; as mk_pte() + * could return a pte with write bit set. + */ + _dst_pte = pte_wrprotect(_dst_pte); dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); @@ -96,7 +105,12 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, } ret = -EEXIST; - if (!pte_none(*dst_pte)) + /* + * We allow to overwrite a pte marker: consider when both MISSING|WP + * registered, we firstly wr-protect a none pte which has no page cache + * page backing it, then access the page. + */ + if (!pte_none_mostly(*dst_pte)) goto out_unlock; if (page_in_cache) { @@ -480,11 +494,10 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); } else { - VM_WARN_ON_ONCE(wp_copy); err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, mode != MCOPY_ATOMIC_NORMAL, - page); + wp_copy, page); } return err; -- cgit v1.2.3-59-g8ed1b From dfe98499ef288ac730662a70110bcadaa67c0ee0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:01 -0700 Subject: shmem: convert shmem_alloc_hugepage() to use vma_alloc_folio() Patch series "Folio patches for 5.19", v2. This patch (of 26): For now, return the head page of the folio, but remove use of the old alloc_pages_vma() API. Link: https://lkml.kernel.org/r/20220504182857.4013401-1-willy@infradead.org Link: https://lkml.kernel.org/r/20220504182857.4013401-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Zi Yan Signed-off-by: Andrew Morton --- mm/shmem.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 4a4a2e628289..7994421588a3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1528,7 +1528,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, struct vm_area_struct pvma; struct address_space *mapping = info->vfs_inode.i_mapping; pgoff_t hindex; - struct page *page; + struct folio *folio; hindex = round_down(index, HPAGE_PMD_NR); if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1, @@ -1536,13 +1536,11 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, return NULL; shmem_pseudo_vma_init(&pvma, info, hindex); - page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true); + folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true); shmem_pseudo_vma_destroy(&pvma); - if (page) - prep_transhuge_page(page); - else + if (!folio) count_vm_event(THP_FILE_FALLBACK); - return page; + return &folio->page; } static struct page *shmem_alloc_page(gfp_t gfp, -- cgit v1.2.3-59-g8ed1b From e2e3fdc7d4afdb8e7ba981eba7827993f2d390a8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:02 -0700 Subject: swap: turn get_swap_page() into folio_alloc_swap() This removes an assumption that a large folio is HPAGE_PMD_NR pages in size. Link: https://lkml.kernel.org/r/20220504182857.4013401-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/swap.h | 13 +++++++------ mm/memcontrol.c | 16 ++++++++-------- mm/shmem.c | 3 ++- mm/swap_slots.c | 14 +++++++------- mm/swap_state.c | 3 ++- mm/swapfile.c | 17 +++++++++-------- 6 files changed, 35 insertions(+), 31 deletions(-) (limited to 'mm/shmem.c') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5553189d0215..252c23a6667f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -470,7 +470,7 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -extern swp_entry_t get_swap_page(struct page *page); +swp_entry_t folio_alloc_swap(struct folio *folio); extern void put_swap_page(struct page *page, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); @@ -583,7 +583,7 @@ static inline int try_to_free_swap(struct page *page) return 0; } -static inline swp_entry_t get_swap_page(struct page *page) +static inline swp_entry_t folio_alloc_swap(struct folio *folio) { swp_entry_t entry; entry.val = 0; @@ -643,12 +643,13 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask) #ifdef CONFIG_MEMCG_SWAP void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); -extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) +int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); +static inline int mem_cgroup_try_charge_swap(struct folio *folio, + swp_entry_t entry) { if (mem_cgroup_disabled()) return 0; - return __mem_cgroup_try_charge_swap(page, entry); + return __mem_cgroup_try_charge_swap(folio, entry); } extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); @@ -666,7 +667,7 @@ static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) { } -static inline int mem_cgroup_try_charge_swap(struct page *page, +static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6e74ca98c862..e1b5823ac060 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7162,17 +7162,17 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) } /** - * __mem_cgroup_try_charge_swap - try charging swap space for a page - * @page: page being added to swap + * __mem_cgroup_try_charge_swap - try charging swap space for a folio + * @folio: folio being added to swap * @entry: swap entry to charge * - * Try to charge @page's memcg for the swap space at @entry. + * Try to charge @folio's memcg for the swap space at @entry. * * Returns 0 on success, -ENOMEM on failure. */ -int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) +int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { - unsigned int nr_pages = thp_nr_pages(page); + unsigned int nr_pages = folio_nr_pages(folio); struct page_counter *counter; struct mem_cgroup *memcg; unsigned short oldid; @@ -7180,9 +7180,9 @@ int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; - memcg = page_memcg(page); + memcg = folio_memcg(folio); - VM_WARN_ON_ONCE_PAGE(!memcg, page); + VM_WARN_ON_ONCE_FOLIO(!memcg, folio); if (!memcg) return 0; @@ -7205,7 +7205,7 @@ int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) if (nr_pages > 1) mem_cgroup_id_get_many(memcg, nr_pages - 1); oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages); - VM_BUG_ON_PAGE(oldid, page); + VM_BUG_ON_FOLIO(oldid, folio); mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); return 0; diff --git a/mm/shmem.c b/mm/shmem.c index 7994421588a3..7cfe76347f63 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1313,6 +1313,7 @@ int shmem_unuse(unsigned int type) */ static int shmem_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); struct shmem_inode_info *info; struct address_space *mapping; struct inode *inode; @@ -1386,7 +1387,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) SetPageUptodate(page); } - swap = get_swap_page(page); + swap = folio_alloc_swap(folio); if (!swap.val) goto redirty; diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 2b5531840583..0218ec1cd24c 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -117,7 +117,7 @@ static int alloc_swap_slot_cache(unsigned int cpu) /* * Do allocation outside swap_slots_cache_mutex - * as kvzalloc could trigger reclaim and get_swap_page, + * as kvzalloc could trigger reclaim and folio_alloc_swap, * which can lock swap_slots_cache_mutex. */ slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t), @@ -213,7 +213,7 @@ static void __drain_swap_slots_cache(unsigned int type) * this function can be invoked in the cpu * hot plug path: * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback - * -> memory allocation -> direct reclaim -> get_swap_page + * -> memory allocation -> direct reclaim -> folio_alloc_swap * -> drain_swap_slots_cache * * Hence the loop over current online cpu below could miss cpu that @@ -301,16 +301,16 @@ direct_free: return 0; } -swp_entry_t get_swap_page(struct page *page) +swp_entry_t folio_alloc_swap(struct folio *folio) { swp_entry_t entry; struct swap_slots_cache *cache; entry.val = 0; - if (PageTransHuge(page)) { + if (folio_test_large(folio)) { if (IS_ENABLED(CONFIG_THP_SWAP)) - get_swap_pages(1, &entry, HPAGE_PMD_NR); + get_swap_pages(1, &entry, folio_nr_pages(folio)); goto out; } @@ -344,8 +344,8 @@ repeat: get_swap_pages(1, &entry, 1); out: - if (mem_cgroup_try_charge_swap(page, entry)) { - put_swap_page(page, entry); + if (mem_cgroup_try_charge_swap(folio, entry)) { + put_swap_page(&folio->page, entry); entry.val = 0; } return entry; diff --git a/mm/swap_state.c b/mm/swap_state.c index 27c4e28f795f..2dc0d02f8d79 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -184,13 +184,14 @@ void __delete_from_swap_cache(struct page *page, */ int add_to_swap(struct page *page) { + struct folio *folio = page_folio(page); swp_entry_t entry; int err; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageUptodate(page), page); - entry = get_swap_page(page); + entry = folio_alloc_swap(folio); if (!entry.val) return 0; diff --git a/mm/swapfile.c b/mm/swapfile.c index 9398e915b36b..bfa41bf5e190 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -77,9 +77,9 @@ static PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs * protected with swap_avail_lock, ordered by priority. - * This is used by get_swap_page() instead of swap_active_head + * This is used by folio_alloc_swap() instead of swap_active_head * because swap_active_head includes all swap_info_structs, - * but get_swap_page() doesn't need to look at full ones. + * but folio_alloc_swap() doesn't need to look at full ones. * This uses its own lock instead of swap_lock because when a * swap_info_struct changes between not-full/full, it needs to * add/remove itself to/from this list, but the swap_info_struct->lock @@ -2109,11 +2109,12 @@ retry: * Under global memory pressure, swap entries can be reinserted back * into process space after the mmlist loop above passes over them. * - * Limit the number of retries? No: when mmget_not_zero() above fails, - * that mm is likely to be freeing swap from exit_mmap(), which proceeds - * at its own independent pace; and even shmem_writepage() could have - * been preempted after get_swap_page(), temporarily hiding that swap. - * It's easy and robust (though cpu-intensive) just to keep retrying. + * Limit the number of retries? No: when mmget_not_zero() + * above fails, that mm is likely to be freeing swap from + * exit_mmap(), which proceeds at its own independent pace; + * and even shmem_writepage() could have been preempted after + * folio_alloc_swap(), temporarily hiding that swap. It's easy + * and robust (though cpu-intensive) just to keep retrying. */ if (READ_ONCE(si->inuse_pages)) { if (!signal_pending(current)) @@ -2327,7 +2328,7 @@ static void _enable_swap_info(struct swap_info_struct *p) * which on removal of any swap_info_struct with an auto-assigned * (i.e. negative) priority increments the auto-assigned priority * of any lower-priority swap_info_structs. - * swap_avail_head needs to be priority ordered for get_swap_page(), + * swap_avail_head needs to be priority ordered for folio_alloc_swap(), * which allocates swap pages from the highest available priority * swap_info_struct. */ -- cgit v1.2.3-59-g8ed1b From 0562457186752b6a65eeca9f5ed0fb9b1e2572e3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:03 -0700 Subject: mm/shmem: use a folio in shmem_unused_huge_shrink When calling split_huge_page() we usually have to find the precise page, but that's not necessary here because we only need to unlock and put the folio afterwards. Saves 231 bytes of text (20% of this function). Link: https://lkml.kernel.org/r/20220504182857.4013401-17-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 7cfe76347f63..6cba5fd9b694 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -554,7 +554,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, LIST_HEAD(to_remove); struct inode *inode; struct shmem_inode_info *info; - struct page *page; + struct folio *folio; unsigned long batch = sc ? sc->nr_to_scan : 128; int split = 0; @@ -598,6 +598,7 @@ next: list_for_each_safe(pos, next, &list) { int ret; + pgoff_t index; info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; @@ -605,14 +606,14 @@ next: if (nr_to_split && split >= nr_to_split) goto move_back; - page = find_get_page(inode->i_mapping, - (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); - if (!page) + index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT; + folio = filemap_get_folio(inode->i_mapping, index); + if (!folio) goto drop; /* No huge page at the end of the file: nothing to split */ - if (!PageTransHuge(page)) { - put_page(page); + if (!folio_test_large(folio)) { + folio_put(folio); goto drop; } @@ -623,14 +624,14 @@ next: * Waiting for the lock may lead to deadlock in the * reclaim path. */ - if (!trylock_page(page)) { - put_page(page); + if (!folio_trylock(folio)) { + folio_put(folio); goto move_back; } - ret = split_huge_page(page); - unlock_page(page); - put_page(page); + ret = split_huge_page(&folio->page); + folio_unlock(folio); + folio_put(folio); /* If split failed move the inode on the list back to shrinklist */ if (ret) -- cgit v1.2.3-59-g8ed1b From b7dd44a12cf26603712e60f1e1449c87d5f8cb79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:04 -0700 Subject: mm/shmem: convert shmem_add_to_page_cache to take a folio Shrinks shmem_add_to_page_cache() by 16 bytes. All the callers grow, but this is temporary as they will all be converted to folios soon. Link: https://lkml.kernel.org/r/20220504182857.4013401-19-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 57 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 6cba5fd9b694..2e800d4e0d03 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -696,36 +696,35 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, /* * Like add_to_page_cache_locked, but error if expected item has gone. */ -static int shmem_add_to_page_cache(struct page *page, +static int shmem_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t index, void *expected, gfp_t gfp, struct mm_struct *charge_mm) { - XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page)); - unsigned long nr = compound_nr(page); + XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); + long nr = folio_nr_pages(folio); int error; - VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(index != round_down(index, nr), page); - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(!PageSwapBacked(page), page); - VM_BUG_ON(expected && PageTransHuge(page)); + VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); + VM_BUG_ON(expected && folio_test_large(folio)); - page_ref_add(page, nr); - page->mapping = mapping; - page->index = index; + folio_ref_add(folio, nr); + folio->mapping = mapping; + folio->index = index; - if (!PageSwapCache(page)) { - error = mem_cgroup_charge(page_folio(page), charge_mm, gfp); + if (!folio_test_swapcache(folio)) { + error = mem_cgroup_charge(folio, charge_mm, gfp); if (error) { - if (PageTransHuge(page)) { + if (folio_test_pmd_mappable(folio)) { count_vm_event(THP_FILE_FALLBACK); count_vm_event(THP_FILE_FALLBACK_CHARGE); } goto error; } } - cgroup_throttle_swaprate(page, gfp); + folio_throttle_swaprate(folio, gfp); do { xas_lock_irq(&xas); @@ -737,16 +736,16 @@ static int shmem_add_to_page_cache(struct page *page, xas_set_err(&xas, -EEXIST); goto unlock; } - xas_store(&xas, page); + xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; - if (PageTransHuge(page)) { + if (folio_test_pmd_mappable(folio)) { count_vm_event(THP_FILE_ALLOC); - __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); } mapping->nrpages += nr; - __mod_lruvec_page_state(page, NR_FILE_PAGES, nr); - __mod_lruvec_page_state(page, NR_SHMEM, nr); + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); + __lruvec_stat_mod_folio(folio, NR_SHMEM, nr); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); @@ -758,8 +757,8 @@ unlock: return 0; error: - page->mapping = NULL; - page_ref_sub(page, nr); + folio->mapping = NULL; + folio_ref_sub(folio, nr); return error; } @@ -1691,7 +1690,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; - struct page *page; + struct page *page = NULL; + struct folio *folio; swp_entry_t swap; int error; @@ -1741,7 +1741,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, goto failed; } - error = shmem_add_to_page_cache(page, mapping, index, + folio = page_folio(page); + error = shmem_add_to_page_cache(folio, mapping, index, swp_to_radix_entry(swap), gfp, charge_mm); if (error) @@ -1792,6 +1793,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo; struct mm_struct *charge_mm; + struct folio *folio; struct page *page; pgoff_t hindex = index; gfp_t huge_gfp; @@ -1906,7 +1908,8 @@ alloc_nohuge: if (sgp == SGP_WRITE) __SetPageReferenced(page); - error = shmem_add_to_page_cache(page, mapping, hindex, + folio = page_folio(page); + error = shmem_add_to_page_cache(folio, mapping, hindex, NULL, gfp & GFP_RECLAIM_MASK, charge_mm); if (error) @@ -2328,6 +2331,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, gfp_t gfp = mapping_gfp_mask(mapping); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); void *page_kaddr; + struct folio *folio; struct page *page; int ret; pgoff_t max_off; @@ -2386,7 +2390,8 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (unlikely(pgoff >= max_off)) goto out_release; - ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, + folio = page_folio(page); + ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp & GFP_RECLAIM_MASK, dst_mm); if (ret) goto out_release; -- cgit v1.2.3-59-g8ed1b From 069d849cde3a02c075548ac68a43ed97fc95ee34 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:04 -0700 Subject: mm/shmem: turn shmem_should_replace_page into shmem_should_replace_folio This is a straightforward conversion. Link: https://lkml.kernel.org/r/20220504182857.4013401-20-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 2e800d4e0d03..d11dc37d332f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1601,9 +1601,9 @@ failed: * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); * but for now it is a simple matter of zone. */ -static bool shmem_should_replace_page(struct page *page, gfp_t gfp) +static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp) { - return page_zonenum(page) > gfp_zone(gfp); + return folio_zonenum(folio) > gfp_zone(gfp); } static int shmem_replace_page(struct page **pagep, gfp_t gfp, @@ -1735,13 +1735,13 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, */ arch_swap_restore(swap, page); - if (shmem_should_replace_page(page, gfp)) { + folio = page_folio(page); + if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_page(&page, gfp, info, index); if (error) goto failed; } - folio = page_folio(page); error = shmem_add_to_page_cache(folio, mapping, index, swp_to_radix_entry(swap), gfp, charge_mm); -- cgit v1.2.3-59-g8ed1b From 0c023ef52d769ea064df2c86bcdf29cbedb0a9b7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:04 -0700 Subject: mm/shmem: add shmem_alloc_folio() Call vma_alloc_folio() directly instead of alloc_page_vma(). Add a shmem_alloc_page() wrapper to avoid changing the callers. Link: https://lkml.kernel.org/r/20220504182857.4013401-21-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index d11dc37d332f..668d054728bc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1544,17 +1544,23 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, return &folio->page; } -static struct page *shmem_alloc_page(gfp_t gfp, +static struct folio *shmem_alloc_folio(gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; - struct page *page; + struct folio *folio; shmem_pseudo_vma_init(&pvma, info, index); - page = alloc_page_vma(gfp, &pvma, 0); + folio = vma_alloc_folio(gfp, 0, &pvma, 0, false); shmem_pseudo_vma_destroy(&pvma); - return page; + return folio; +} + +static struct page *shmem_alloc_page(gfp_t gfp, + struct shmem_inode_info *info, pgoff_t index) +{ + return &shmem_alloc_folio(gfp, info, index)->page; } static struct page *shmem_alloc_and_acct_page(gfp_t gfp, -- cgit v1.2.3-59-g8ed1b From 72827e5c2bcb86d56f8a8aa78fde0085d8535567 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:04 -0700 Subject: mm/shmem: convert shmem_alloc_and_acct_page to use a folio Convert shmem_alloc_hugepage() to return the folio that it uses and use a folio throughout shmem_alloc_and_acct_page(). Continue to return a page from shmem_alloc_and_acct_page() for now. Link: https://lkml.kernel.org/r/20220504182857.4013401-22-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 668d054728bc..d5b23932357d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1523,7 +1523,7 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) return result; } -static struct page *shmem_alloc_hugepage(gfp_t gfp, +static struct folio *shmem_alloc_hugefolio(gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct vm_area_struct pvma; @@ -1541,7 +1541,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, shmem_pseudo_vma_destroy(&pvma); if (!folio) count_vm_event(THP_FILE_FALLBACK); - return &folio->page; + return folio; } static struct folio *shmem_alloc_folio(gfp_t gfp, @@ -1568,7 +1568,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, pgoff_t index, bool huge) { struct shmem_inode_info *info = SHMEM_I(inode); - struct page *page; + struct folio *folio; int nr; int err = -ENOSPC; @@ -1580,13 +1580,13 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, goto failed; if (huge) - page = shmem_alloc_hugepage(gfp, info, index); + folio = shmem_alloc_hugefolio(gfp, info, index); else - page = shmem_alloc_page(gfp, info, index); - if (page) { - __SetPageLocked(page); - __SetPageSwapBacked(page); - return page; + folio = shmem_alloc_folio(gfp, info, index); + if (folio) { + __folio_set_locked(folio); + __folio_set_swapbacked(folio); + return &folio->page; } err = -ENOMEM; -- cgit v1.2.3-59-g8ed1b From b1d0ec3a9a250b2d5ddd790fdaa2245432a903a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:05 -0700 Subject: mm/shmem: convert shmem_getpage_gfp to use a folio Rename shmem_alloc_and_acct_page() to shmem_alloc_and_acct_folio() and have it return a folio, then use a folio throuughout shmem_getpage_gfp(). It continues to return a struct page. Link: https://lkml.kernel.org/r/20220504182857.4013401-23-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 95 ++++++++++++++++++++++++++++---------------------------------- 1 file changed, 43 insertions(+), 52 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index d5b23932357d..edcaba7c67ff 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1563,8 +1563,7 @@ static struct page *shmem_alloc_page(gfp_t gfp, return &shmem_alloc_folio(gfp, info, index)->page; } -static struct page *shmem_alloc_and_acct_page(gfp_t gfp, - struct inode *inode, +static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode, pgoff_t index, bool huge) { struct shmem_inode_info *info = SHMEM_I(inode); @@ -1586,7 +1585,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, if (folio) { __folio_set_locked(folio); __folio_set_swapbacked(folio); - return &folio->page; + return folio; } err = -ENOMEM; @@ -1800,7 +1799,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct shmem_sb_info *sbinfo; struct mm_struct *charge_mm; struct folio *folio; - struct page *page; pgoff_t hindex = index; gfp_t huge_gfp; int error; @@ -1818,19 +1816,18 @@ repeat: sbinfo = SHMEM_SB(inode->i_sb); charge_mm = vma ? vma->vm_mm : NULL; - page = pagecache_get_page(mapping, index, - FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0); - - if (page && vma && userfaultfd_minor(vma)) { - if (!xa_is_value(page)) { - unlock_page(page); - put_page(page); + folio = __filemap_get_folio(mapping, index, FGP_ENTRY | FGP_LOCK, 0); + if (folio && vma && userfaultfd_minor(vma)) { + if (!xa_is_value(folio)) { + folio_unlock(folio); + folio_put(folio); } *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); return 0; } - if (xa_is_value(page)) { + if (xa_is_value(folio)) { + struct page *page = &folio->page; error = shmem_swapin_page(inode, index, &page, sgp, gfp, vma, fault_type); if (error == -EEXIST) @@ -1840,17 +1837,17 @@ repeat: return error; } - if (page) { - hindex = page->index; + if (folio) { + hindex = folio->index; if (sgp == SGP_WRITE) - mark_page_accessed(page); - if (PageUptodate(page)) + folio_mark_accessed(folio); + if (folio_test_uptodate(folio)) goto out; /* fallocated page */ if (sgp != SGP_READ) goto clear; - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } /* @@ -1877,17 +1874,16 @@ repeat: huge_gfp = vma_thp_gfp_mask(vma); huge_gfp = limit_gfp_mask(huge_gfp, gfp); - page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true); - if (IS_ERR(page)) { + folio = shmem_alloc_and_acct_folio(huge_gfp, inode, index, true); + if (IS_ERR(folio)) { alloc_nohuge: - page = shmem_alloc_and_acct_page(gfp, inode, - index, false); + folio = shmem_alloc_and_acct_folio(gfp, inode, index, false); } - if (IS_ERR(page)) { + if (IS_ERR(folio)) { int retry = 5; - error = PTR_ERR(page); - page = NULL; + error = PTR_ERR(folio); + folio = NULL; if (error != -ENOSPC) goto unlock; /* @@ -1906,30 +1902,26 @@ alloc_nohuge: goto unlock; } - if (PageTransHuge(page)) - hindex = round_down(index, HPAGE_PMD_NR); - else - hindex = index; + hindex = round_down(index, folio_nr_pages(folio)); if (sgp == SGP_WRITE) - __SetPageReferenced(page); + __folio_set_referenced(folio); - folio = page_folio(page); error = shmem_add_to_page_cache(folio, mapping, hindex, NULL, gfp & GFP_RECLAIM_MASK, charge_mm); if (error) goto unacct; - lru_cache_add(page); + folio_add_lru(folio); spin_lock_irq(&info->lock); - info->alloced += compound_nr(page); - inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); + info->alloced += folio_nr_pages(folio); + inode->i_blocks += BLOCKS_PER_PAGE << folio_order(folio); shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); alloced = true; - if (PageTransHuge(page) && + if (folio_test_pmd_mappable(folio) && DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < hindex + HPAGE_PMD_NR - 1) { /* @@ -1960,22 +1952,21 @@ clear: * but SGP_FALLOC on a page fallocated earlier must initialize * it now, lest undo on failure cancel our earlier guarantee. */ - if (sgp != SGP_WRITE && !PageUptodate(page)) { - int i; + if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) { + long i, n = folio_nr_pages(folio); - for (i = 0; i < compound_nr(page); i++) { - clear_highpage(page + i); - flush_dcache_page(page + i); - } - SetPageUptodate(page); + for (i = 0; i < n; i++) + clear_highpage(folio_page(folio, i)); + flush_dcache_folio(folio); + folio_mark_uptodate(folio); } /* Perhaps the file has been truncated since we checked */ if (sgp <= SGP_CACHE && ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { if (alloced) { - ClearPageDirty(page); - delete_from_page_cache(page); + folio_clear_dirty(folio); + filemap_remove_folio(folio); spin_lock_irq(&info->lock); shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); @@ -1984,24 +1975,24 @@ clear: goto unlock; } out: - *pagep = page + index - hindex; + *pagep = folio_page(folio, index - hindex); return 0; /* * Error recovery. */ unacct: - shmem_inode_unacct_blocks(inode, compound_nr(page)); + shmem_inode_unacct_blocks(inode, folio_nr_pages(folio)); - if (PageTransHuge(page)) { - unlock_page(page); - put_page(page); + if (folio_test_large(folio)) { + folio_unlock(folio); + folio_put(folio); goto alloc_nohuge; } unlock: - if (page) { - unlock_page(page); - put_page(page); + if (folio) { + folio_unlock(folio); + folio_put(folio); } if (error == -ENOSPC && !once++) { spin_lock_irq(&info->lock); -- cgit v1.2.3-59-g8ed1b From da08e9b7932345aff0a748c55f8a25709505f2a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 May 2022 20:23:05 -0700 Subject: mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio() shmem_swapin_page() only brings in order-0 pages, which are folios by definition. Link: https://lkml.kernel.org/r/20220504182857.4013401-24-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- arch/arm64/include/asm/pgtable.h | 6 +-- include/linux/pgtable.h | 2 +- mm/shmem.c | 110 ++++++++++++++++++--------------------- 3 files changed, 55 insertions(+), 63 deletions(-) (limited to 'mm/shmem.c') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3278c6c0d873..8ebf1cec5d90 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -981,10 +981,10 @@ static inline void arch_swap_invalidate_area(int type) } #define __HAVE_ARCH_SWAP_RESTORE -static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { - if (system_supports_mte() && mte_restore_tags(entry, page)) - set_bit(PG_mte_tagged, &page->flags); + if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) + set_bit(PG_mte_tagged, &folio->flags); } #endif /* CONFIG_ARM64_MTE */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 530b1817b58c..39fa93e94b80 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -758,7 +758,7 @@ static inline void arch_swap_invalidate_area(int type) #endif #ifndef __HAVE_ARCH_SWAP_RESTORE -static inline void arch_swap_restore(swp_entry_t entry, struct page *page) +static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { } #endif diff --git a/mm/shmem.c b/mm/shmem.c index edcaba7c67ff..29701be579f8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -135,8 +135,8 @@ static unsigned long shmem_default_max_inodes(void) } #endif -static int shmem_swapin_page(struct inode *inode, pgoff_t index, - struct page **pagep, enum sgp_type sgp, +static int shmem_swapin_folio(struct inode *inode, pgoff_t index, + struct folio **foliop, enum sgp_type sgp, gfp_t gfp, struct vm_area_struct *vma, vm_fault_t *fault_type); static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, @@ -1159,69 +1159,64 @@ static void shmem_evict_inode(struct inode *inode) } static int shmem_find_swap_entries(struct address_space *mapping, - pgoff_t start, unsigned int nr_entries, - struct page **entries, pgoff_t *indices, - unsigned int type) + pgoff_t start, struct folio_batch *fbatch, + pgoff_t *indices, unsigned int type) { XA_STATE(xas, &mapping->i_pages, start); - struct page *page; + struct folio *folio; swp_entry_t entry; unsigned int ret = 0; - if (!nr_entries) - return 0; - rcu_read_lock(); - xas_for_each(&xas, page, ULONG_MAX) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, ULONG_MAX) { + if (xas_retry(&xas, folio)) continue; - if (!xa_is_value(page)) + if (!xa_is_value(folio)) continue; - entry = radix_to_swp_entry(page); + entry = radix_to_swp_entry(folio); if (swp_type(entry) != type) continue; indices[ret] = xas.xa_index; - entries[ret] = page; + if (!folio_batch_add(fbatch, folio)) + break; if (need_resched()) { xas_pause(&xas); cond_resched_rcu(); } - if (++ret == nr_entries) - break; } rcu_read_unlock(); - return ret; + return xas.xa_index; } /* * Move the swapped pages for an inode to page cache. Returns the count * of pages swapped in, or the error in case of failure. */ -static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, - pgoff_t *indices) +static int shmem_unuse_swap_entries(struct inode *inode, + struct folio_batch *fbatch, pgoff_t *indices) { int i = 0; int ret = 0; int error = 0; struct address_space *mapping = inode->i_mapping; - for (i = 0; i < pvec.nr; i++) { - struct page *page = pvec.pages[i]; + for (i = 0; i < folio_batch_count(fbatch); i++) { + struct folio *folio = fbatch->folios[i]; - if (!xa_is_value(page)) + if (!xa_is_value(folio)) continue; - error = shmem_swapin_page(inode, indices[i], - &page, SGP_CACHE, + error = shmem_swapin_folio(inode, indices[i], + &folio, SGP_CACHE, mapping_gfp_mask(mapping), NULL, NULL); if (error == 0) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); ret++; } if (error == -ENOMEM) @@ -1238,26 +1233,23 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type) { struct address_space *mapping = inode->i_mapping; pgoff_t start = 0; - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; int ret = 0; - pagevec_init(&pvec); do { - unsigned int nr_entries = PAGEVEC_SIZE; - - pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, - pvec.pages, indices, type); - if (pvec.nr == 0) { + folio_batch_init(&fbatch); + shmem_find_swap_entries(mapping, start, &fbatch, indices, type); + if (folio_batch_count(&fbatch) == 0) { ret = 0; break; } - ret = shmem_unuse_swap_entries(inode, pvec, indices); + ret = shmem_unuse_swap_entries(inode, &fbatch, indices); if (ret < 0) break; - start = indices[pvec.nr - 1]; + start = indices[folio_batch_count(&fbatch) - 1]; } while (true); return ret; @@ -1687,22 +1679,22 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, * Returns 0 and the page in pagep if success. On failure, returns the * error code and NULL in *pagep. */ -static int shmem_swapin_page(struct inode *inode, pgoff_t index, - struct page **pagep, enum sgp_type sgp, +static int shmem_swapin_folio(struct inode *inode, pgoff_t index, + struct folio **foliop, enum sgp_type sgp, gfp_t gfp, struct vm_area_struct *vma, vm_fault_t *fault_type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; - struct page *page = NULL; - struct folio *folio; + struct page *page; + struct folio *folio = NULL; swp_entry_t swap; int error; - VM_BUG_ON(!*pagep || !xa_is_value(*pagep)); - swap = radix_to_swp_entry(*pagep); - *pagep = NULL; + VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); + swap = radix_to_swp_entry(*foliop); + *foliop = NULL; /* Look it up and read it in.. */ page = lookup_swap_cache(swap, NULL, 0); @@ -1720,27 +1712,28 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, goto failed; } } + folio = page_folio(page); /* We have to do this with page locked to prevent races */ - lock_page(page); - if (!PageSwapCache(page) || page_private(page) != swap.val || + folio_lock(folio); + if (!folio_test_swapcache(folio) || + folio_swap_entry(folio).val != swap.val || !shmem_confirm_swap(mapping, index, swap)) { error = -EEXIST; goto unlock; } - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { error = -EIO; goto failed; } - wait_on_page_writeback(page); + folio_wait_writeback(folio); /* * Some architectures may have to restore extra metadata to the - * physical page after reading from swap. + * folio after reading from swap. */ - arch_swap_restore(swap, page); + arch_swap_restore(swap, folio); - folio = page_folio(page); if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_page(&page, gfp, info, index); if (error) @@ -1759,21 +1752,21 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, spin_unlock_irq(&info->lock); if (sgp == SGP_WRITE) - mark_page_accessed(page); + folio_mark_accessed(folio); - delete_from_swap_cache(page); - set_page_dirty(page); + delete_from_swap_cache(&folio->page); + folio_mark_dirty(folio); swap_free(swap); - *pagep = page; + *foliop = folio; return 0; failed: if (!shmem_confirm_swap(mapping, index, swap)) error = -EEXIST; unlock: - if (page) { - unlock_page(page); - put_page(page); + if (folio) { + folio_unlock(folio); + folio_put(folio); } return error; @@ -1827,13 +1820,12 @@ repeat: } if (xa_is_value(folio)) { - struct page *page = &folio->page; - error = shmem_swapin_page(inode, index, &page, + error = shmem_swapin_folio(inode, index, &folio, sgp, gfp, vma, fault_type); if (error == -EEXIST) goto repeat; - *pagep = page; + *pagep = &folio->page; return error; } -- cgit v1.2.3-59-g8ed1b From c791576c60288c89b351ea2d2098f6a872d78fa7 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 19 May 2022 14:08:50 -0700 Subject: mm: khugepaged: introduce khugepaged_enter_vma() helper The khugepaged_enter_vma_merge() actually does as the same thing as the khugepaged_enter() section called by shmem_mmap(), so consolidate them into one helper and rename it to khugepaged_enter_vma(). Link: https://lkml.kernel.org/r/20220510203222.24246-8-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: Vlastmil Babka Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Rik van Riel Cc: Song Liu Cc: Song Liu Cc: Theodore Ts'o Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/khugepaged.h | 8 ++++---- mm/khugepaged.c | 6 +++--- mm/mmap.c | 8 ++++---- mm/shmem.c | 12 ++---------- 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'mm/shmem.c') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index c340f6bb39d6..392d34c3c59a 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -14,8 +14,8 @@ extern bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags); extern void __khugepaged_enter(struct mm_struct *mm); extern void __khugepaged_exit(struct mm_struct *mm); -extern void khugepaged_enter_vma_merge(struct vm_area_struct *vma, - unsigned long vm_flags); +extern void khugepaged_enter_vma(struct vm_area_struct *vma, + unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); #ifdef CONFIG_SHMEM extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr); @@ -72,8 +72,8 @@ static inline void khugepaged_enter(struct vm_area_struct *vma, unsigned long vm_flags) { } -static inline void khugepaged_enter_vma_merge(struct vm_area_struct *vma, - unsigned long vm_flags) +static inline void khugepaged_enter_vma(struct vm_area_struct *vma, + unsigned long vm_flags) { } static inline void collapse_pte_mapped_thp(struct mm_struct *mm, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 9ef626e2c750..16be62d493cd 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -365,7 +365,7 @@ int hugepage_madvise(struct vm_area_struct *vma, * register it here without waiting a page fault that * may not happen any time soon. */ - khugepaged_enter_vma_merge(vma, *vm_flags); + khugepaged_enter_vma(vma, *vm_flags); break; case MADV_NOHUGEPAGE: *vm_flags &= ~VM_HUGEPAGE; @@ -505,8 +505,8 @@ void __khugepaged_enter(struct mm_struct *mm) wake_up_interruptible(&khugepaged_wait); } -void khugepaged_enter_vma_merge(struct vm_area_struct *vma, - unsigned long vm_flags) +void khugepaged_enter_vma(struct vm_area_struct *vma, + unsigned long vm_flags) { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) && khugepaged_enabled() && diff --git a/mm/mmap.c b/mm/mmap.c index 7f7d982721b9..4456bf81e11e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1223,7 +1223,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, end, prev->vm_pgoff, NULL, prev); if (err) return NULL; - khugepaged_enter_vma_merge(prev, vm_flags); + khugepaged_enter_vma(prev, vm_flags); return prev; } @@ -1250,7 +1250,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, } if (err) return NULL; - khugepaged_enter_vma_merge(area, vm_flags); + khugepaged_enter_vma(area, vm_flags); return area; } @@ -2450,7 +2450,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) } } anon_vma_unlock_write(vma->anon_vma); - khugepaged_enter_vma_merge(vma, vma->vm_flags); + khugepaged_enter_vma(vma, vma->vm_flags); validate_mm(mm); return error; } @@ -2528,7 +2528,7 @@ int expand_downwards(struct vm_area_struct *vma, } } anon_vma_unlock_write(vma->anon_vma); - khugepaged_enter_vma_merge(vma, vma->vm_flags); + khugepaged_enter_vma(vma, vma->vm_flags); validate_mm(mm); return error; } diff --git a/mm/shmem.c b/mm/shmem.c index 29701be579f8..89f6f4fec3f9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2232,11 +2232,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &shmem_vm_ops; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < - (vma->vm_end & HPAGE_PMD_MASK)) { - khugepaged_enter(vma, vma->vm_flags); - } + khugepaged_enter_vma(vma, vma->vm_flags); return 0; } @@ -4137,11 +4133,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - ((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) < - (vma->vm_end & HPAGE_PMD_MASK)) { - khugepaged_enter(vma, vma->vm_flags); - } + khugepaged_enter_vma(vma, vma->vm_flags); return 0; } -- cgit v1.2.3-59-g8ed1b From 613bec092fe78307a8b130353ce1ef340915587f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 19 May 2022 14:08:50 -0700 Subject: mm: mmap: register suitable readonly file vmas for khugepaged The readonly FS THP relies on khugepaged to collapse THP for suitable vmas. But the behavior is inconsistent for "always" mode (https://lore.kernel.org/linux-mm/00f195d4-d039-3cf2-d3a1-a2c88de397a0@suse.cz/). The "always" mode means THP allocation should be tried all the time and khugepaged should try to collapse THP all the time. Of course the allocation and collapse may fail due to other factors and conditions. Currently file THP may not be collapsed by khugepaged even though all the conditions are met. That does break the semantics of "always" mode. So make sure readonly FS vmas are registered to khugepaged to fix the break. Register suitable vmas in common mmap path, that could cover both readonly FS vmas and shmem vmas, so remove the khugepaged calls in shmem.c. Still need to keep the khugepaged call in vma_merge() since vma_merge() is called in a lot of places, for example, madvise, mprotect, etc. Link: https://lkml.kernel.org/r/20220510203222.24246-9-shy828301@gmail.com Signed-off-by: Yang Shi Reported-by: Vlastimil Babka Acked-by: Vlastmil Babka Cc: Kirill A. Shutemov Cc: Miaohe Lin Cc: Song Liu Cc: Rik van Riel Cc: Matthew Wilcox (Oracle) Cc: Zi Yan Cc: Theodore Ts'o Cc: Song Liu Signed-off-by: Andrew Morton --- mm/mmap.c | 7 +++++++ mm/shmem.c | 4 ---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/mmap.c b/mm/mmap.c index 4456bf81e11e..2b9305ed0dda 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1847,6 +1847,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } vma_link(mm, vma, prev, rb_link, rb_parent); + + /* + * vma_merge() calls khugepaged_enter_vma() either, the below + * call covers the non-merge case. + */ + khugepaged_enter_vma(vma, vma->vm_flags); + /* Once vma denies write, undo our temporary denial count */ unmap_writable: if (file && vm_flags & VM_SHARED) diff --git a/mm/shmem.c b/mm/shmem.c index 89f6f4fec3f9..67a3f3b05fb2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -2232,7 +2231,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &shmem_vm_ops; - khugepaged_enter_vma(vma, vma->vm_flags); return 0; } @@ -4133,8 +4131,6 @@ int shmem_zero_setup(struct vm_area_struct *vma) vma->vm_file = file; vma->vm_ops = &shmem_vm_ops; - khugepaged_enter_vma(vma, vma->vm_flags); - return 0; } -- cgit v1.2.3-59-g8ed1b From d14f5efadd846dbde561bd734318de6a9e6b26e6 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Fri, 13 May 2022 10:52:25 +0800 Subject: tmpfs: fix undefined-behaviour in shmem_reconfigure() When shmem_reconfigure() calls __percpu_counter_compare(), the second parameter is unsigned long long. But in the definition of __percpu_counter_compare(), the second parameter is s64. So when __percpu_counter_compare() executes abs(count - rhs), UBSAN shows the following warning: ================================================================================ UBSAN: Undefined behaviour in lib/percpu_counter.c:209:6 signed integer overflow: 0 - -9223372036854775808 cannot be represented in type 'long long int' CPU: 1 PID: 9636 Comm: syz-executor.2 Tainted: G ---------r- - 4.18.0 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack home/install/linux-rh-3-10/lib/dump_stack.c:77 [inline] dump_stack+0x125/0x1ae home/install/linux-rh-3-10/lib/dump_stack.c:117 ubsan_epilogue+0xe/0x81 home/install/linux-rh-3-10/lib/ubsan.c:159 handle_overflow+0x19d/0x1ec home/install/linux-rh-3-10/lib/ubsan.c:190 __percpu_counter_compare+0x124/0x140 home/install/linux-rh-3-10/lib/percpu_counter.c:209 percpu_counter_compare home/install/linux-rh-3-10/./include/linux/percpu_counter.h:50 [inline] shmem_remount_fs+0x1ce/0x6b0 home/install/linux-rh-3-10/mm/shmem.c:3530 do_remount_sb+0x11b/0x530 home/install/linux-rh-3-10/fs/super.c:888 do_remount home/install/linux-rh-3-10/fs/namespace.c:2344 [inline] do_mount+0xf8d/0x26b0 home/install/linux-rh-3-10/fs/namespace.c:2844 ksys_mount+0xad/0x120 home/install/linux-rh-3-10/fs/namespace.c:3075 __do_sys_mount home/install/linux-rh-3-10/fs/namespace.c:3089 [inline] __se_sys_mount home/install/linux-rh-3-10/fs/namespace.c:3086 [inline] __x64_sys_mount+0xbf/0x160 home/install/linux-rh-3-10/fs/namespace.c:3086 do_syscall_64+0xca/0x5c0 home/install/linux-rh-3-10/arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x6a/0xdf RIP: 0033:0x46b5e9 Code: 5d db fa ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b db fa ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f54d5f22c68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 000000000077bf60 RCX: 000000000046b5e9 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000000 RBP: 000000000077bf60 R08: 0000000020000140 R09: 0000000000000000 R10: 00000000026740a4 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd1fb1592f R14: 00007f54d5f239c0 R15: 000000000077bf6c ================================================================================ [akpm@linux-foundation.org: tweak error message text] Link: https://lkml.kernel.org/r/20220513025225.2678727-1-luomeng12@huawei.com Signed-off-by: Luo Meng Cc: Hugh Dickins Cc: Yu Kuai Signed-off-by: Andrew Morton --- mm/shmem.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 67a3f3b05fb2..7f41a24ed382 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3476,6 +3476,10 @@ static int shmem_reconfigure(struct fs_context *fc) raw_spin_lock(&sbinfo->stat_lock); inodes = sbinfo->max_inodes - sbinfo->free_inodes; + if (ctx->blocks > S64_MAX) { + err = "Number of blocks too large"; + goto out; + } if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; -- cgit v1.2.3-59-g8ed1b From e384200e70664cd690cdadb72b2a1bc9dfcdec1a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 21 May 2022 19:53:04 -0700 Subject: mm/shmem: fix shmem folio swapoff hang Shmem swapoff makes no progress: the index to indices is not incremented. But "ret" is no longer a return value, so use folio_batch_count() instead. Link: https://lkml.kernel.org/r/c32bee8a-f0aa-245-f94e-24dd271924fa@google.com Fixes: da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()") Signed-off-by: Hugh Dickins Reviewed-by: Miaohe Lin Tested-by: Miaohe Lin Cc: Matthew Wilcox Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- mm/shmem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/shmem.c') diff --git a/mm/shmem.c b/mm/shmem.c index 7f41a24ed382..b8169beff226 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1164,7 +1164,6 @@ static int shmem_find_swap_entries(struct address_space *mapping, XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; swp_entry_t entry; - unsigned int ret = 0; rcu_read_lock(); xas_for_each(&xas, folio, ULONG_MAX) { @@ -1178,7 +1177,7 @@ static int shmem_find_swap_entries(struct address_space *mapping, if (swp_type(entry) != type) continue; - indices[ret] = xas.xa_index; + indices[folio_batch_count(fbatch)] = xas.xa_index; if (!folio_batch_add(fbatch, folio)) break; -- cgit v1.2.3-59-g8ed1b