From d68eccad370665830e16e5c77611fde78cd749b3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Sep 2019 14:03:12 -0400 Subject: mm/filemap: Allow large folios to be added to the page cache We return -EEXIST if there are any non-shadow entries in the page cache in the range covered by the folio. If there are multiple shadow entries in the range, we set *shadowp to one of them (currently the one at the highest index). If that turns out to be the wrong answer, we can implement something more complex. This is mostly modelled after the equivalent function in the shmem code. Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index ad8c39d90bf9..8f7ac3de9098 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -842,26 +842,27 @@ noinline int __filemap_add_folio(struct address_space *mapping, { XA_STATE(xas, &mapping->i_pages, index); int huge = folio_test_hugetlb(folio); - int error; bool charged = false; + long nr = 1; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio); mapping_set_update(&xas, mapping); - folio_get(folio); - folio->mapping = mapping; - folio->index = index; - if (!huge) { - error = mem_cgroup_charge(folio, NULL, gfp); + int error = mem_cgroup_charge(folio, NULL, gfp); VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio); if (error) - goto error; + return error; charged = true; + xas_set_order(&xas, index, folio_order(folio)); + nr = folio_nr_pages(folio); } gfp &= GFP_RECLAIM_MASK; + folio_ref_add(folio, nr); + folio->mapping = mapping; + folio->index = xas.xa_index; do { unsigned int order = xa_get_order(xas.xa, xas.xa_index); @@ -885,6 +886,8 @@ noinline int __filemap_add_folio(struct address_space *mapping, /* entry may have been split before we acquired lock */ order = xa_get_order(xas.xa, xas.xa_index); if (order > folio_order(folio)) { + /* How to handle large swap entries? */ + BUG_ON(shmem_mapping(mapping)); xas_split(&xas, old, order); xas_reset(&xas); } @@ -894,29 +897,31 @@ noinline int __filemap_add_folio(struct address_space *mapping, if (xas_error(&xas)) goto unlock; - mapping->nrpages++; + mapping->nrpages += nr; /* hugetlb pages do not participate in page cache accounting */ - if (!huge) - __lruvec_stat_add_folio(folio, NR_FILE_PAGES); + if (!huge) { + __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr); + if (folio_test_pmd_mappable(folio)) + __lruvec_stat_mod_folio(folio, + NR_FILE_THPS, nr); + } unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); - if (xas_error(&xas)) { - error = xas_error(&xas); - if (charged) - mem_cgroup_uncharge(folio); + if (xas_error(&xas)) goto error; - } trace_mm_filemap_add_to_page_cache(folio); return 0; error: + if (charged) + mem_cgroup_uncharge(folio); folio->mapping = NULL; /* Leave page->index set: truncation relies upon it */ - folio_put(folio); - return error; + folio_put_refs(folio, nr); + return xas_error(&xas); } ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO); -- cgit v1.2.3-59-g8ed1b From 56a4d67c264e37014b8392cba9869c7fe904ed1e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Jul 2021 23:26:14 -0400 Subject: mm/readahead: Switch to page_cache_ra_order do_page_cache_ra() was being exposed for the benefit of do_sync_mmap_readahead(). Switch it over to page_cache_ra_order() partly because it's a better interface but mostly for the benefit of the next patch. Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 2 +- mm/internal.h | 4 ++-- mm/readahead.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index 8f7ac3de9098..fe764225ae99 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3027,7 +3027,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) ra->size = ra->ra_pages; ra->async_size = ra->ra_pages / 4; ractl._index = ra->start; - do_page_cache_ra(&ractl, ra->size, ra->async_size); + page_cache_ra_order(&ractl, ra, 0); return fpin; } diff --git a/mm/internal.h b/mm/internal.h index 2b2c2c4eb63a..293eca1360dc 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -79,8 +79,8 @@ void unmap_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, struct zap_details *details); -void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read, - unsigned long lookahead_size); +void page_cache_ra_order(struct readahead_control *, struct file_ra_state *, + unsigned int order); void force_page_cache_ra(struct readahead_control *, unsigned long nr); static inline void force_page_cache_readahead(struct address_space *mapping, struct file *file, pgoff_t index, unsigned long nr_to_read) diff --git a/mm/readahead.c b/mm/readahead.c index 5100eaf5b0ee..a20391d6a71b 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); * behaviour which would occur if page allocations are causing VM writeback. * We really don't want to intermingle reads and writes like that. */ -void do_page_cache_ra(struct readahead_control *ractl, +static void do_page_cache_ra(struct readahead_control *ractl, unsigned long nr_to_read, unsigned long lookahead_size) { struct inode *inode = ractl->mapping->host; @@ -462,7 +462,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, return err; } -static void page_cache_ra_order(struct readahead_control *ractl, +void page_cache_ra_order(struct readahead_control *ractl, struct file_ra_state *ra, unsigned int new_order) { struct address_space *mapping = ractl->mapping; -- cgit v1.2.3-59-g8ed1b From 4687fdbb805a92ce5a9f23042c436dc64fef8b77 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 24 Jul 2021 23:37:13 -0400 Subject: mm/filemap: Support VM_HUGEPAGE for file mappings If the VM_HUGEPAGE flag is set, attempt to allocate PMD-sized folios during readahead, even if we have no history of readahead being successful. Signed-off-by: Matthew Wilcox (Oracle) --- mm/filemap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index fe764225ae99..7608ee030662 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2995,6 +2995,24 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct file *fpin = NULL; unsigned int mmap_miss; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* Use the readahead code, even if readahead is disabled */ + if (vmf->vma->vm_flags & VM_HUGEPAGE) { + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); + ra->size = HPAGE_PMD_NR; + /* + * Fetch two PMD folios, so we get the chance to actually + * readahead, unless we've been told not to. + */ + if (!(vmf->vma->vm_flags & VM_RAND_READ)) + ra->size *= 2; + ra->async_size = HPAGE_PMD_NR; + page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER); + return fpin; + } +#endif + /* If we don't want any read-ahead, don't bother */ if (vmf->vma->vm_flags & VM_RAND_READ) return fpin; -- cgit v1.2.3-59-g8ed1b