aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c1261
1 files changed, 648 insertions, 613 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 767202265048..647d72bf23b6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -21,6 +21,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/file.h>
@@ -34,13 +35,13 @@
#include <linux/cpuset.h>
#include <linux/hugetlb.h>
#include <linux/memcontrol.h>
-#include <linux/cleancache.h>
#include <linux/shmem_fs.h>
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
+#include <linux/migrate.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -71,7 +72,7 @@
* Lock ordering:
*
* ->i_mmap_rwsem (truncate_pagecache)
- * ->private_lock (__free_pte->__set_page_dirty_buffers)
+ * ->private_lock (__free_pte->block_dirty_folio)
* ->swap_lock (exclusive_swap_page, others)
* ->i_pages lock
*
@@ -114,106 +115,98 @@
* ->memcg->move_lock (page_remove_rmap->lock_page_memcg)
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
- * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
+ * ->private_lock (zap_pte_range->block_dirty_folio)
*
* ->i_mmap_rwsem
* ->tasklist_lock (memory_failure, collect_procs_ao)
*/
static void page_cache_delete(struct address_space *mapping,
- struct page *page, void *shadow)
+ struct folio *folio, void *shadow)
{
- XA_STATE(xas, &mapping->i_pages, page->index);
- unsigned int nr = 1;
+ XA_STATE(xas, &mapping->i_pages, folio->index);
+ long nr = 1;
mapping_set_update(&xas, mapping);
/* hugetlb pages are represented by a single entry in the xarray */
- if (!PageHuge(page)) {
- xas_set_order(&xas, page->index, compound_order(page));
- nr = compound_nr(page);
+ if (!folio_test_hugetlb(folio)) {
+ xas_set_order(&xas, folio->index, folio_order(folio));
+ nr = folio_nr_pages(folio);
}
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(nr != 1 && shadow, page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
xas_store(&xas, shadow);
xas_init_marks(&xas);
- page->mapping = NULL;
+ folio->mapping = NULL;
/* Leave page->index set: truncation lookup relies upon it */
mapping->nrpages -= nr;
}
-static void unaccount_page_cache_page(struct address_space *mapping,
- struct page *page)
+static void filemap_unaccount_folio(struct address_space *mapping,
+ struct folio *folio)
{
- int nr;
-
- /*
- * if we're uptodate, flush out into the cleancache, otherwise
- * invalidate any existing cleancache entries. We can't leave
- * stale data around in the cleancache once our page is gone
- */
- if (PageUptodate(page) && PageMappedToDisk(page))
- cleancache_put_page(page);
- else
- cleancache_invalidate_page(mapping, page);
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(page_mapped(page), page);
- if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
- int mapcount;
+ long nr;
+ VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
+ if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
- current->comm, page_to_pfn(page));
- dump_page(page, "still mapped when deleted");
+ current->comm, folio_pfn(folio));
+ dump_page(&folio->page, "still mapped when deleted");
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
- mapcount = page_mapcount(page);
- if (mapping_exiting(mapping) &&
- page_count(page) >= mapcount + 2) {
- /*
- * All vmas have already been torn down, so it's
- * a good bet that actually the page is unmapped,
- * and we'd prefer not to leak it: if we're wrong,
- * some other bad page check should catch it later.
- */
- page_mapcount_reset(page);
- page_ref_sub(page, mapcount);
+ if (mapping_exiting(mapping) && !folio_test_large(folio)) {
+ int mapcount = page_mapcount(&folio->page);
+
+ if (folio_ref_count(folio) >= mapcount + 2) {
+ /*
+ * All vmas have already been torn down, so it's
+ * a good bet that actually the page is unmapped
+ * and we'd rather not leak it: if we're wrong,
+ * another bad page check should catch it later.
+ */
+ page_mapcount_reset(&folio->page);
+ folio_ref_sub(folio, mapcount);
+ }
}
}
- /* hugetlb pages do not participate in page cache accounting. */
- if (PageHuge(page))
+ /* hugetlb folios do not participate in page cache accounting. */
+ if (folio_test_hugetlb(folio))
return;
- nr = thp_nr_pages(page);
+ nr = folio_nr_pages(folio);
- __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
- if (PageSwapBacked(page)) {
- __mod_lruvec_page_state(page, NR_SHMEM, -nr);
- if (PageTransHuge(page))
- __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr);
- } else if (PageTransHuge(page)) {
- __mod_lruvec_page_state(page, NR_FILE_THPS, -nr);
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
+ } else if (folio_test_pmd_mappable(folio)) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
filemap_nr_thps_dec(mapping);
}
/*
- * At this point page must be either written or cleaned by
- * truncate. Dirty page here signals a bug and loss of
- * unwritten data.
+ * At this point folio must be either written or cleaned by
+ * truncate. Dirty folio here signals a bug and loss of
+ * unwritten data - on ordinary filesystems.
+ *
+ * But it's harmless on in-memory filesystems like tmpfs; and can
+ * occur when a driver which did get_user_pages() sets page dirty
+ * before putting it, while the inode is being finally evicted.
*
- * This fixes dirty accounting after removing the page entirely
- * but leaves PageDirty set: it has no effect for truncated
- * page and anyway will be cleared before returning page into
+ * Below fixes dirty accounting after removing the folio entirely
+ * but leaves the dirty flag set: it has no effect for truncated
+ * folio and anyway will be cleared before returning folio to
* buddy allocator.
*/
- if (WARN_ON_ONCE(PageDirty(page)))
- account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
+ if (WARN_ON_ONCE(folio_test_dirty(folio) &&
+ mapping_can_writeback(mapping)))
+ folio_account_cleaned(folio, inode_to_wb(mapping->host));
}
/*
@@ -221,87 +214,81 @@ static void unaccount_page_cache_page(struct address_space *mapping,
* sure the page is locked and that nobody else uses it - or that usage
* is safe. The caller must hold the i_pages lock.
*/
-void __delete_from_page_cache(struct page *page, void *shadow)
+void __filemap_remove_folio(struct folio *folio, void *shadow)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
- trace_mm_filemap_delete_from_page_cache(page);
-
- unaccount_page_cache_page(mapping, page);
- page_cache_delete(mapping, page, shadow);
+ trace_mm_filemap_delete_from_page_cache(folio);
+ filemap_unaccount_folio(mapping, folio);
+ page_cache_delete(mapping, folio, shadow);
}
-static void page_cache_free_page(struct address_space *mapping,
- struct page *page)
+void filemap_free_folio(struct address_space *mapping, struct folio *folio)
{
void (*freepage)(struct page *);
+ int refs = 1;
freepage = mapping->a_ops->freepage;
if (freepage)
- freepage(page);
+ freepage(&folio->page);
- if (PageTransHuge(page) && !PageHuge(page)) {
- page_ref_sub(page, thp_nr_pages(page));
- VM_BUG_ON_PAGE(page_count(page) <= 0, page);
- } else {
- put_page(page);
- }
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ refs = folio_nr_pages(folio);
+ folio_put_refs(folio, refs);
}
/**
- * delete_from_page_cache - delete page from page cache
- * @page: the page which the kernel is trying to remove from page cache
+ * filemap_remove_folio - Remove folio from page cache.
+ * @folio: The folio.
*
- * This must be called only on pages that have been verified to be in the page
- * cache and locked. It will never put the page into the free list, the caller
- * has a reference on the page.
+ * This must be called only on folios that are locked and have been
+ * verified to be in the page cache. It will never put the folio into
+ * the free list because the caller has a reference on the page.
*/
-void delete_from_page_cache(struct page *page)
+void filemap_remove_folio(struct folio *folio)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio->mapping;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
- __delete_from_page_cache(page, NULL);
+ __filemap_remove_folio(folio, NULL);
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
spin_unlock(&mapping->host->i_lock);
- page_cache_free_page(mapping, page);
+ filemap_free_folio(mapping, folio);
}
-EXPORT_SYMBOL(delete_from_page_cache);
/*
- * page_cache_delete_batch - delete several pages from page cache
- * @mapping: the mapping to which pages belong
- * @pvec: pagevec with pages to delete
+ * page_cache_delete_batch - delete several folios from page cache
+ * @mapping: the mapping to which folios belong
+ * @fbatch: batch of folios to delete
*
- * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
- * It tolerates holes in @pvec (mapping entries at those indices are not
- * modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * The function walks over mapping->i_pages and removes folios passed in
+ * @fbatch from the mapping. The function expects @fbatch to be sorted
+ * by page index and is optimised for it to be dense.
+ * It tolerates holes in @fbatch (mapping entries at those indices are not
+ * modified).
*
* The function expects the i_pages lock to be held.
*/
static void page_cache_delete_batch(struct address_space *mapping,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
- XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
- int total_pages = 0;
+ XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);
+ long total_pages = 0;
int i = 0;
- struct page *page;
+ struct folio *folio;
mapping_set_update(&xas, mapping);
- xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec))
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (i >= folio_batch_count(fbatch))
break;
/* A swap/dax/shadow entry got inserted? Skip it. */
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
/*
* A page got inserted in our range? Skip it. We have our
@@ -310,54 +297,48 @@ static void page_cache_delete_batch(struct address_space *mapping,
* means our page has been removed, which shouldn't be
* possible because we're holding the PageLock.
*/
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
- page);
+ if (folio != fbatch->folios[i]) {
+ VM_BUG_ON_FOLIO(folio->index >
+ fbatch->folios[i]->index, folio);
continue;
}
- WARN_ON_ONCE(!PageLocked(page));
+ WARN_ON_ONCE(!folio_test_locked(folio));
- if (page->index == xas.xa_index)
- page->mapping = NULL;
- /* Leave page->index set: truncation lookup relies on it */
+ folio->mapping = NULL;
+ /* Leave folio->index set: truncation lookup relies on it */
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + compound_nr(page) - 1 == xas.xa_index)
- i++;
+ i++;
xas_store(&xas, NULL);
- total_pages++;
+ total_pages += folio_nr_pages(folio);
}
mapping->nrpages -= total_pages;
}
void delete_from_page_cache_batch(struct address_space *mapping,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
int i;
- if (!pagevec_count(pvec))
+ if (!folio_batch_count(fbatch))
return;
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
- for (i = 0; i < pagevec_count(pvec); i++) {
- trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
+ for (i = 0; i < folio_batch_count(fbatch); i++) {
+ struct folio *folio = fbatch->folios[i];
- unaccount_page_cache_page(mapping, pvec->pages[i]);
+ trace_mm_filemap_delete_from_page_cache(folio);
+ filemap_unaccount_folio(mapping, folio);
}
- page_cache_delete_batch(mapping, pvec);
+ page_cache_delete_batch(mapping, fbatch);
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
spin_unlock(&mapping->host->i_lock);
- for (i = 0; i < pagevec_count(pvec); i++)
- page_cache_free_page(mapping, pvec->pages[i]);
+ for (i = 0; i < folio_batch_count(fbatch); i++)
+ filemap_free_folio(mapping, fbatch->folios[i]);
}
int filemap_check_errors(struct address_space *mapping)
@@ -646,8 +627,8 @@ static bool mapping_needs_writeback(struct address_space *mapping)
return mapping->nrpages;
}
-static bool filemap_range_has_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
+bool filemap_range_has_writeback(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
{
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
pgoff_t max = end_byte >> PAGE_SHIFT;
@@ -667,34 +648,8 @@ static bool filemap_range_has_writeback(struct address_space *mapping,
}
rcu_read_unlock();
return page != NULL;
-
-}
-
-/**
- * filemap_range_needs_writeback - check if range potentially needs writeback
- * @mapping: address space within which to check
- * @start_byte: offset in bytes where the range starts
- * @end_byte: offset in bytes where the range ends (inclusive)
- *
- * Find at least one page in the range supplied, usually used to check if
- * direct writing in this range will trigger a writeback. Used by O_DIRECT
- * read/write with IOCB_NOWAIT, to see if the caller needs to do
- * filemap_write_and_wait_range() before proceeding.
- *
- * Return: %true if the caller should do filemap_write_and_wait_range() before
- * doing O_DIRECT to a page in this range, %false otherwise.
- */
-bool filemap_range_needs_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
-{
- if (!mapping_needs_writeback(mapping))
- return false;
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
- !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- return false;
- return filemap_range_has_writeback(mapping, start_byte, end_byte);
}
-EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+EXPORT_SYMBOL_GPL(filemap_range_has_writeback);
/**
* filemap_write_and_wait_range - write out & wait on a file range
@@ -891,26 +846,27 @@ noinline int __filemap_add_folio(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, index);
int huge = folio_test_hugetlb(folio);
- int error;
bool charged = false;
+ long nr = 1;
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
mapping_set_update(&xas, mapping);
- folio_get(folio);
- folio->mapping = mapping;
- folio->index = index;
-
if (!huge) {
- error = mem_cgroup_charge(folio, NULL, gfp);
+ int error = mem_cgroup_charge(folio, NULL, gfp);
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
- goto error;
+ return error;
charged = true;
+ xas_set_order(&xas, index, folio_order(folio));
+ nr = folio_nr_pages(folio);
}
gfp &= GFP_RECLAIM_MASK;
+ folio_ref_add(folio, nr);
+ folio->mapping = mapping;
+ folio->index = xas.xa_index;
do {
unsigned int order = xa_get_order(xas.xa, xas.xa_index);
@@ -934,6 +890,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
/* entry may have been split before we acquired lock */
order = xa_get_order(xas.xa, xas.xa_index);
if (order > folio_order(folio)) {
+ /* How to handle large swap entries? */
+ BUG_ON(shmem_mapping(mapping));
xas_split(&xas, old, order);
xas_reset(&xas);
}
@@ -943,29 +901,31 @@ noinline int __filemap_add_folio(struct address_space *mapping,
if (xas_error(&xas))
goto unlock;
- mapping->nrpages++;
+ mapping->nrpages += nr;
/* hugetlb pages do not participate in page cache accounting */
- if (!huge)
- __lruvec_stat_add_folio(folio, NR_FILE_PAGES);
+ if (!huge) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, nr);
+ }
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
- if (xas_error(&xas)) {
- error = xas_error(&xas);
- if (charged)
- mem_cgroup_uncharge(folio);
+ if (xas_error(&xas))
goto error;
- }
- trace_mm_filemap_add_to_page_cache(&folio->page);
+ trace_mm_filemap_add_to_page_cache(folio);
return 0;
error:
+ if (charged)
+ mem_cgroup_uncharge(folio);
folio->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
- folio_put(folio);
- return error;
+ folio_put_refs(folio, nr);
+ return xas_error(&xas);
}
ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
@@ -1103,6 +1063,12 @@ void __init pagecache_init(void)
init_waitqueue_head(&folio_wait_table[i]);
page_writeback_init();
+
+ /*
+ * tmpfs uses the ZERO_PAGE for reading holes: it is up-to-date,
+ * and splice's page_cache_pipe_buf_confirm() needs to see that.
+ */
+ SetPageUptodate(ZERO_PAGE(0));
}
/*
@@ -1223,24 +1189,17 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
}
/*
- * It is possible for other pages to have collided on the waitqueue
- * hash, so in that case check for a page match. That prevents a long-
- * term waiter
+ * It's possible to miss clearing waiters here, when we woke our page
+ * waiters, but the hashed waitqueue has waiters for other pages on it.
+ * That's okay, it's a rare case. The next waker will clear it.
*
- * It is still possible to miss a case here, when we woke page waiters
- * and removed them from the waitqueue, but there are still other
- * page waiters.
+ * Note that, depending on the page pool (buddy, hugetlb, ZONE_DEVICE,
+ * other), the flag may be cleared in the course of freeing the page;
+ * but that is not required for correctness.
*/
- if (!waitqueue_active(q) || !key.page_match) {
+ if (!waitqueue_active(q) || !key.page_match)
folio_clear_waiters(folio);
- /*
- * It's possible to miss clearing Waiters here, when we woke
- * our page waiters, but the hashed waitqueue has waiters for
- * other pages on it.
- *
- * That's okay, it's a rare case. The next waker will clear it.
- */
- }
+
spin_unlock_irqrestore(&q->lock, flags);
}
@@ -1259,10 +1218,10 @@ enum behavior {
* __folio_lock() waiting on then setting PG_locked.
*/
SHARED, /* Hold ref to page and check the bit when woken, like
- * wait_on_page_writeback() waiting on PG_writeback.
+ * folio_wait_writeback() waiting on PG_writeback.
*/
DROP, /* Drop ref to page before wait, no check when woken,
- * like put_and_wait_on_page_locked() on PG_locked.
+ * like folio_put_wait_locked() on PG_locked.
*/
};
@@ -1426,6 +1385,95 @@ repeat:
return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}
+#ifdef CONFIG_MIGRATION
+/**
+ * migration_entry_wait_on_locked - Wait for a migration entry to be removed
+ * @entry: migration swap entry.
+ * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required
+ * for pte entries, pass NULL for pmd entries.
+ * @ptl: already locked ptl. This function will drop the lock.
+ *
+ * Wait for a migration entry referencing the given page to be removed. This is
+ * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except
+ * this can be called without taking a reference on the page. Instead this
+ * should be called while holding the ptl for the migration entry referencing
+ * the page.
+ *
+ * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock().
+ *
+ * This follows the same logic as folio_wait_bit_common() so see the comments
+ * there.
+ */
+void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
+ spinlock_t *ptl)
+{
+ struct wait_page_queue wait_page;
+ wait_queue_entry_t *wait = &wait_page.wait;
+ bool thrashing = false;
+ bool delayacct = false;
+ unsigned long pflags;
+ wait_queue_head_t *q;
+ struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+
+ q = folio_waitqueue(folio);
+ if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+ if (!folio_test_swapbacked(folio)) {
+ delayacct_thrashing_start();
+ delayacct = true;
+ }
+ psi_memstall_enter(&pflags);
+ thrashing = true;
+ }
+
+ init_wait(wait);
+ wait->func = wake_page_function;
+ wait_page.folio = folio;
+ wait_page.bit_nr = PG_locked;
+ wait->flags = 0;
+
+ spin_lock_irq(&q->lock);
+ folio_set_waiters(folio);
+ if (!folio_trylock_flag(folio, PG_locked, wait))
+ __add_wait_queue_entry_tail(q, wait);
+ spin_unlock_irq(&q->lock);
+
+ /*
+ * If a migration entry exists for the page the migration path must hold
+ * a valid reference to the page, and it must take the ptl to remove the
+ * migration entry. So the page is valid until the ptl is dropped.
+ */
+ if (ptep)
+ pte_unmap_unlock(ptep, ptl);
+ else
+ spin_unlock(ptl);
+
+ for (;;) {
+ unsigned int flags;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ /* Loop until we've been woken or interrupted */
+ flags = smp_load_acquire(&wait->flags);
+ if (!(flags & WQ_FLAG_WOKEN)) {
+ if (signal_pending_state(TASK_UNINTERRUPTIBLE, current))
+ break;
+
+ io_schedule();
+ continue;
+ }
+ break;
+ }
+
+ finish_wait(q, wait);
+
+ if (thrashing) {
+ if (delayacct)
+ delayacct_thrashing_end();
+ psi_memstall_leave(&pflags);
+ }
+}
+#endif
+
void folio_wait_bit(struct folio *folio, int bit_nr)
{
folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
@@ -1439,22 +1487,21 @@ int folio_wait_bit_killable(struct folio *folio, int bit_nr)
EXPORT_SYMBOL(folio_wait_bit_killable);
/**
- * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
- * @page: The page to wait for.
+ * folio_put_wait_locked - Drop a reference and wait for it to be unlocked
+ * @folio: The folio to wait for.
* @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
*
- * The caller should hold a reference on @page. They expect the page to
+ * The caller should hold a reference on @folio. They expect the page to
* become unlocked relatively soon, but do not wish to hold up migration
- * (for example) by holding the reference while waiting for the page to
+ * (for example) by holding the reference while waiting for the folio to
* come unlocked. After this function returns, the caller should not
- * dereference @page.
+ * dereference @folio.
*
- * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
+ * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal.
*/
-int put_and_wait_on_page_locked(struct page *page, int state)
+int folio_put_wait_locked(struct folio *folio, int state)
{
- return folio_wait_bit_common(page_folio(page), PG_locked, state,
- DROP);
+ return folio_wait_bit_common(folio, PG_locked, state, DROP);
}
/**
@@ -1979,37 +2026,36 @@ no_page:
}
EXPORT_SYMBOL(__filemap_get_folio);
-static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
+static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
xa_mark_t mark)
{
- struct page *page;
+ struct folio *folio;
retry:
if (mark == XA_PRESENT)
- page = xas_find(xas, max);
+ folio = xas_find(xas, max);
else
- page = xas_find_marked(xas, max, mark);
+ folio = xas_find_marked(xas, max, mark);
- if (xas_retry(xas, page))
+ if (xas_retry(xas, folio))
goto retry;
/*
* A shadow entry of a recently evicted page, a swap
* entry from shmem/tmpfs or a DAX entry. Return it
* without attempting to raise page count.
*/
- if (!page || xa_is_value(page))
- return page;
+ if (!folio || xa_is_value(folio))
+ return folio;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
goto reset;
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(xas))) {
- put_page(page);
+ if (unlikely(folio != xas_reload(xas))) {
+ folio_put(folio);
goto reset;
}
- return page;
+ return folio;
reset:
xas_reset(xas);
goto retry;
@@ -2020,56 +2066,36 @@ reset:
* @mapping: The address_space to search
* @start: The starting page cache index
* @end: The final page index (inclusive).
- * @pvec: Where the resulting entries are placed.
+ * @fbatch: Where the resulting entries are placed.
* @indices: The cache indices corresponding to the entries in @entries
*
* find_get_entries() will search for and return a batch of entries in
- * the mapping. The entries are placed in @pvec. find_get_entries()
- * takes a reference on any actual pages it returns.
+ * the mapping. The entries are placed in @fbatch. find_get_entries()
+ * takes a reference on any actual folios it returns.
*
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes. There may be holes in the indices due to
- * not-present pages.
+ * The entries have ascending indexes. The indices may not be consecutive
+ * due to not-present entries or large folios.
*
- * Any shadow entries of evicted pages, or swap entries from
+ * Any shadow entries of evicted folios, or swap entries from
* shmem/tmpfs, are included in the returned array.
*
- * If it finds a Transparent Huge Page, head or tail, find_get_entries()
- * stops at that page: the caller is likely to have a better way to handle
- * the compound page as a whole, and then skip its extent, than repeatedly
- * calling find_get_entries() to return all its tails.
- *
- * Return: the number of pages and shadow entries which were found.
+ * Return: The number of entries which were found.
*/
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
- pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+ pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
- unsigned int ret = 0;
- unsigned nr_entries = PAGEVEC_SIZE;
+ struct folio *folio;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
- /*
- * Terminate early on finding a THP, to allow the caller to
- * handle it all at once; but continue if this is hugetlbfs.
- */
- if (!xa_is_value(page) && PageTransHuge(page) &&
- !PageHuge(page)) {
- page = find_subpage(page, xas.xa_index);
- nr_entries = ret + 1;
- }
-
- indices[ret] = xas.xa_index;
- pvec->pages[ret] = page;
- if (++ret == nr_entries)
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
+ indices[fbatch->nr] = xas.xa_index;
+ if (!folio_batch_add(fbatch, folio))
break;
}
rcu_read_unlock();
- pvec->nr = ret;
- return ret;
+ return folio_batch_count(fbatch);
}
/**
@@ -2077,63 +2103,64 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
* @mapping: The address_space to search.
* @start: The starting page cache index.
* @end: The final page index (inclusive).
- * @pvec: Where the resulting entries are placed.
- * @indices: The cache indices of the entries in @pvec.
+ * @fbatch: Where the resulting entries are placed.
+ * @indices: The cache indices of the entries in @fbatch.
*
* find_lock_entries() will return a batch of entries from @mapping.
- * Swap, shadow and DAX entries are included. Pages are returned
- * locked and with an incremented refcount. Pages which are locked by
- * somebody else or under writeback are skipped. Only the head page of
- * a THP is returned. Pages which are partially outside the range are
- * not returned.
+ * Swap, shadow and DAX entries are included. Folios are returned
+ * locked and with an incremented refcount. Folios which are locked
+ * by somebody else or under writeback are skipped. Folios which are
+ * partially outside the range are not returned.
*
* The entries have ascending indexes. The indices may not be consecutive
- * due to not-present entries, THP pages, pages which could not be locked
- * or pages under writeback.
+ * due to not-present entries, large folios, folios which could not be
+ * locked or folios under writeback.
*
* Return: The number of entries which were found.
*/
unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
- pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+ pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
+ struct folio *folio;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
- if (!xa_is_value(page)) {
- if (page->index < start)
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
+ if (!xa_is_value(folio)) {
+ if (folio->index < start)
goto put;
- if (page->index + thp_nr_pages(page) - 1 > end)
+ if (folio->index + folio_nr_pages(folio) - 1 > end)
goto put;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto put;
- if (page->mapping != mapping || PageWriteback(page))
+ if (folio->mapping != mapping ||
+ folio_test_writeback(folio))
goto unlock;
- VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
- page);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
+ folio);
}
- indices[pvec->nr] = xas.xa_index;
- if (!pagevec_add(pvec, page))
+ indices[fbatch->nr] = xas.xa_index;
+ if (!folio_batch_add(fbatch, folio))
break;
- goto next;
+ continue;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
put:
- put_page(page);
-next:
- if (!xa_is_value(page) && PageTransHuge(page)) {
- unsigned int nr_pages = thp_nr_pages(page);
-
- /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
- xas_set(&xas, page->index + nr_pages);
- if (xas.xa_index < nr_pages)
- break;
- }
+ folio_put(folio);
}
rcu_read_unlock();
- return pagevec_count(pvec);
+ return folio_batch_count(fbatch);
+}
+
+static inline
+bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
+{
+ if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+ return false;
+ if (index >= max)
+ return false;
+ return index < folio->index + folio_nr_pages(folio) - 1;
}
/**
@@ -2162,23 +2189,29 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, *start);
- struct page *page;
+ struct folio *folio;
unsigned ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
/* Skip over shadow, swap and DAX entries */
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- pages[ret] = find_subpage(page, xas.xa_index);
+again:
+ pages[ret] = folio_file_page(folio, xas.xa_index);
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
+ if (folio_more_pages(folio, xas.xa_index, end)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
}
/*
@@ -2204,8 +2237,9 @@ out:
* @nr_pages: The maximum number of pages
* @pages: Where the resulting pages are placed
*
- * find_get_pages_contig() works exactly like find_get_pages(), except
- * that the returned number of pages are guaranteed to be contiguous.
+ * find_get_pages_contig() works exactly like find_get_pages_range(),
+ * except that the returned number of pages are guaranteed to be
+ * contiguous.
*
* Return: the number of pages which were found.
*/
@@ -2213,36 +2247,41 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
unsigned int nr_pages, struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, index);
- struct page *page;
+ struct folio *folio;
unsigned int ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- for (page = xas_load(&xas); page; page = xas_next(&xas)) {
- if (xas_retry(&xas, page))
+ for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+ if (xas_retry(&xas, folio))
continue;
/*
* If the entry has been swapped out, we can stop looking.
* No current caller is looking for DAX entries.
*/
- if (xa_is_value(page))
+ if (xa_is_value(folio))
break;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
goto retry;
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
+ if (unlikely(folio != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+again:
+ pages[ret] = folio_file_page(folio, xas.xa_index);
if (++ret == nr_pages)
break;
+ if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
continue;
put_page:
- put_page(page);
+ folio_put(folio);
retry:
xas_reset(&xas);
}
@@ -2260,9 +2299,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
* @nr_pages: the maximum number of pages
* @pages: where the resulting pages are placed
*
- * Like find_get_pages(), except we only return head pages which are tagged
- * with @tag. @index is updated to the index immediately after the last
- * page we return, ready for the next iteration.
+ * Like find_get_pages_range(), except we only return head pages which are
+ * tagged with @tag. @index is updated to the index immediately after the
+ * last page we return, ready for the next iteration.
*
* Return: the number of pages which were found.
*/
@@ -2271,25 +2310,25 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, *index);
- struct page *page;
+ struct folio *folio;
unsigned ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, tag))) {
+ while ((folio = find_get_entry(&xas, end, tag))) {
/*
* Shadow entries should never be tagged, but this iteration
* is lockless so there is a window for page reclaim to evict
* a page we saw tagged. Skip over it.
*/
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- pages[ret] = page;
+ pages[ret] = &folio->page;
if (++ret == nr_pages) {
- *index = page->index + thp_nr_pages(page);
+ *index = folio->index + folio_nr_pages(folio);
goto out;
}
}
@@ -2332,52 +2371,50 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
}
/*
- * filemap_get_read_batch - Get a batch of pages for read
+ * filemap_get_read_batch - Get a batch of folios for read
*
- * Get a batch of pages which represent a contiguous range of bytes
- * in the file. No tail pages will be returned. If @index is in the
- * middle of a THP, the entire THP will be returned. The last page in
- * the batch may have Readahead set or be not Uptodate so that the
- * caller can take the appropriate action.
+ * Get a batch of folios which represent a contiguous range of bytes in
+ * the file. No exceptional entries will be returned. If @index is in
+ * the middle of a folio, the entire folio will be returned. The last
+ * folio in the batch may have the readahead flag set or the uptodate flag
+ * clear so that the caller can take the appropriate action.
*/
static void filemap_get_read_batch(struct address_space *mapping,
- pgoff_t index, pgoff_t max, struct pagevec *pvec)
+ pgoff_t index, pgoff_t max, struct folio_batch *fbatch)
{
XA_STATE(xas, &mapping->i_pages, index);
- struct page *head;
+ struct folio *folio;
rcu_read_lock();
- for (head = xas_load(&xas); head; head = xas_next(&xas)) {
- if (xas_retry(&xas, head))
+ for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+ if (xas_retry(&xas, folio))
continue;
- if (xas.xa_index > max || xa_is_value(head))
+ if (xas.xa_index > max || xa_is_value(folio))
break;
- if (!page_cache_get_speculative(head))
+ if (!folio_try_get_rcu(folio))
goto retry;
- /* Has the page moved or been split? */
- if (unlikely(head != xas_reload(&xas)))
- goto put_page;
+ if (unlikely(folio != xas_reload(&xas)))
+ goto put_folio;
- if (!pagevec_add(pvec, head))
+ if (!folio_batch_add(fbatch, folio))
break;
- if (!PageUptodate(head))
+ if (!folio_test_uptodate(folio))
break;
- if (PageReadahead(head))
+ if (folio_test_readahead(folio))
break;
- xas.xa_index = head->index + thp_nr_pages(head) - 1;
- xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+ xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1);
continue;
-put_page:
- put_page(head);
+put_folio:
+ folio_put(folio);
retry:
xas_reset(&xas);
}
rcu_read_unlock();
}
-static int filemap_read_page(struct file *file, struct address_space *mapping,
- struct page *page)
+static int filemap_read_folio(struct file *file, struct address_space *mapping,
+ struct folio *folio)
{
int error;
@@ -2386,52 +2423,51 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
* eg. multipath errors. PG_error will be set again if readpage
* fails.
*/
- ClearPageError(page);
+ folio_clear_error(folio);
/* Start the actual read. The read will unlock the page. */
- error = mapping->a_ops->readpage(file, page);
+ error = mapping->a_ops->readpage(file, &folio->page);
if (error)
return error;
- error = wait_on_page_locked_killable(page);
+ error = folio_wait_locked_killable(folio);
if (error)
return error;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
return 0;
shrink_readahead_size_eio(&file->f_ra);
return -EIO;
}
static bool filemap_range_uptodate(struct address_space *mapping,
- loff_t pos, struct iov_iter *iter, struct page *page)
+ loff_t pos, struct iov_iter *iter, struct folio *folio)
{
int count;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
return true;
/* pipes can't handle partially uptodate pages */
if (iov_iter_is_pipe(iter))
return false;
if (!mapping->a_ops->is_partially_uptodate)
return false;
- if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
+ if (mapping->host->i_blkbits >= folio_shift(folio))
return false;
count = iter->count;
- if (page_offset(page) > pos) {
- count -= page_offset(page) - pos;
+ if (folio_pos(folio) > pos) {
+ count -= folio_pos(folio) - pos;
pos = 0;
} else {
- pos -= page_offset(page);
+ pos -= folio_pos(folio);
}
- return mapping->a_ops->is_partially_uptodate(page, pos, count);
+ return mapping->a_ops->is_partially_uptodate(folio, pos, count);
}
static int filemap_update_page(struct kiocb *iocb,
struct address_space *mapping, struct iov_iter *iter,
- struct page *page)
+ struct folio *folio)
{
- struct folio *folio = page_folio(page);
int error;
if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2447,7 +2483,11 @@ static int filemap_update_page(struct kiocb *iocb,
goto unlock_mapping;
if (!(iocb->ki_flags & IOCB_WAITQ)) {
filemap_invalidate_unlock_shared(mapping);
- put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
+ /*
+ * This is where we usually end up waiting for a
+ * previously submitted readahead to finish.
+ */
+ folio_put_wait_locked(folio, TASK_KILLABLE);
return AOP_TRUNCATED_PAGE;
}
error = __folio_lock_async(folio, iocb->ki_waitq);
@@ -2460,14 +2500,14 @@ static int filemap_update_page(struct kiocb *iocb,
goto unlock;
error = 0;
- if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
+ if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio))
goto unlock;
error = -EAGAIN;
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
goto unlock;
- error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
+ error = filemap_read_folio(iocb->ki_filp, mapping, folio);
goto unlock_mapping;
unlock:
folio_unlock(folio);
@@ -2478,70 +2518,72 @@ unlock_mapping:
return error;
}
-static int filemap_create_page(struct file *file,
+static int filemap_create_folio(struct file *file,
struct address_space *mapping, pgoff_t index,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
- struct page *page;
+ struct folio *folio;
int error;
- page = page_cache_alloc(mapping);
- if (!page)
+ folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+ if (!folio)
return -ENOMEM;
/*
- * Protect against truncate / hole punch. Grabbing invalidate_lock here
- * assures we cannot instantiate and bring uptodate new pagecache pages
- * after evicting page cache during truncate and before actually
- * freeing blocks. Note that we could release invalidate_lock after
- * inserting the page into page cache as the locked page would then be
- * enough to synchronize with hole punching. But there are code paths
- * such as filemap_update_page() filling in partially uptodate pages or
- * ->readpages() that need to hold invalidate_lock while mapping blocks
- * for IO so let's hold the lock here as well to keep locking rules
- * simple.
+ * Protect against truncate / hole punch. Grabbing invalidate_lock
+ * here assures we cannot instantiate and bring uptodate new
+ * pagecache folios after evicting page cache during truncate
+ * and before actually freeing blocks. Note that we could
+ * release invalidate_lock after inserting the folio into
+ * the page cache as the locked folio would then be enough to
+ * synchronize with hole punching. But there are code paths
+ * such as filemap_update_page() filling in partially uptodate
+ * pages or ->readpages() that need to hold invalidate_lock
+ * while mapping blocks for IO so let's hold the lock here as
+ * well to keep locking rules simple.
*/
filemap_invalidate_lock_shared(mapping);
- error = add_to_page_cache_lru(page, mapping, index,
+ error = filemap_add_folio(mapping, folio, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
error = AOP_TRUNCATED_PAGE;
if (error)
goto error;
- error = filemap_read_page(file, mapping, page);
+ error = filemap_read_folio(file, mapping, folio);
if (error)
goto error;
filemap_invalidate_unlock_shared(mapping);
- pagevec_add(pvec, page);
+ folio_batch_add(fbatch, folio);
return 0;
error:
filemap_invalidate_unlock_shared(mapping);
- put_page(page);
+ folio_put(folio);
return error;
}
static int filemap_readahead(struct kiocb *iocb, struct file *file,
- struct address_space *mapping, struct page *page,
+ struct address_space *mapping, struct folio *folio,
pgoff_t last_index)
{
+ DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);
+
if (iocb->ki_flags & IOCB_NOIO)
return -EAGAIN;
- page_cache_async_readahead(mapping, &file->f_ra, file, page,
- page->index, last_index - page->index);
+ page_cache_async_ra(&ractl, folio, last_index - folio->index);
return 0;
}
static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
struct file *filp = iocb->ki_filp;
struct address_space *mapping = filp->f_mapping;
struct file_ra_state *ra = &filp->f_ra;
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
pgoff_t last_index;
- struct page *page;
+ struct folio *folio;
int err = 0;
last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
@@ -2549,34 +2591,35 @@ retry:
if (fatal_signal_pending(current))
return -EINTR;
- filemap_get_read_batch(mapping, index, last_index, pvec);
- if (!pagevec_count(pvec)) {
+ filemap_get_read_batch(mapping, index, last_index, fbatch);
+ if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & IOCB_NOIO)
return -EAGAIN;
page_cache_sync_readahead(mapping, ra, filp, index,
last_index - index);
- filemap_get_read_batch(mapping, index, last_index, pvec);
+ filemap_get_read_batch(mapping, index, last_index, fbatch);
}
- if (!pagevec_count(pvec)) {
+ if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
return -EAGAIN;
- err = filemap_create_page(filp, mapping,
- iocb->ki_pos >> PAGE_SHIFT, pvec);
+ err = filemap_create_folio(filp, mapping,
+ iocb->ki_pos >> PAGE_SHIFT, fbatch);
if (err == AOP_TRUNCATED_PAGE)
goto retry;
return err;
}
- page = pvec->pages[pagevec_count(pvec) - 1];
- if (PageReadahead(page)) {
- err = filemap_readahead(iocb, filp, mapping, page, last_index);
+ folio = fbatch->folios[folio_batch_count(fbatch) - 1];
+ if (folio_test_readahead(folio)) {
+ err = filemap_readahead(iocb, filp, mapping, folio, last_index);
if (err)
goto err;
}
- if (!PageUptodate(page)) {
- if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
+ if (!folio_test_uptodate(folio)) {
+ if ((iocb->ki_flags & IOCB_WAITQ) &&
+ folio_batch_count(fbatch) > 1)
iocb->ki_flags |= IOCB_NOWAIT;
- err = filemap_update_page(iocb, mapping, iter, page);
+ err = filemap_update_page(iocb, mapping, iter, folio);
if (err)
goto err;
}
@@ -2584,8 +2627,8 @@ retry:
return 0;
err:
if (err < 0)
- put_page(page);
- if (likely(--pvec->nr))
+ folio_put(folio);
+ if (likely(--fbatch->nr))
return 0;
if (err == AOP_TRUNCATED_PAGE)
goto retry;
@@ -2612,7 +2655,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- struct pagevec pvec;
+ struct folio_batch fbatch;
int i, error = 0;
bool writably_mapped;
loff_t isize, end_offset;
@@ -2623,7 +2666,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
do {
cond_resched();
@@ -2639,7 +2682,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
- error = filemap_get_pages(iocb, iter, &pvec);
+ error = filemap_get_pages(iocb, iter, &fbatch);
if (error < 0)
break;
@@ -2653,7 +2696,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
*/
isize = i_size_read(inode);
if (unlikely(iocb->ki_pos >= isize))
- goto put_pages;
+ goto put_folios;
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
/*
@@ -2668,33 +2711,29 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
*/
if (iocb->ki_pos >> PAGE_SHIFT !=
ra->prev_pos >> PAGE_SHIFT)
- mark_page_accessed(pvec.pages[0]);
+ folio_mark_accessed(fbatch.folios[0]);
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- size_t page_size = thp_size(page);
- size_t offset = iocb->ki_pos & (page_size - 1);
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ size_t fsize = folio_size(folio);
+ size_t offset = iocb->ki_pos & (fsize - 1);
size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
- page_size - offset);
+ fsize - offset);
size_t copied;
- if (end_offset < page_offset(page))
+ if (end_offset < folio_pos(folio))
break;
if (i > 0)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
/*
- * If users can be writing to this page using arbitrary
- * virtual addresses, take care about potential aliasing
- * before reading the page on the kernel side.
+ * If users can be writing to this folio using arbitrary
+ * virtual addresses, take care of potential aliasing
+ * before reading the folio on the kernel side.
*/
- if (writably_mapped) {
- int j;
-
- for (j = 0; j < thp_nr_pages(page); j++)
- flush_dcache_page(page + j);
- }
+ if (writably_mapped)
+ flush_dcache_folio(folio);
- copied = copy_page_to_iter(page, offset, bytes, iter);
+ copied = copy_folio_to_iter(folio, offset, bytes, iter);
already_read += copied;
iocb->ki_pos += copied;
@@ -2705,10 +2744,10 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
break;
}
}
-put_pages:
- for (i = 0; i < pagevec_count(&pvec); i++)
- put_page(pvec.pages[i]);
- pagevec_reinit(&pvec);
+put_folios:
+ for (i = 0; i < folio_batch_count(&fbatch); i++)
+ folio_put(fbatch.folios[i]);
+ folio_batch_init(&fbatch);
} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
file_accessed(filp);
@@ -2793,44 +2832,44 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
EXPORT_SYMBOL(generic_file_read_iter);
-static inline loff_t page_seek_hole_data(struct xa_state *xas,
- struct address_space *mapping, struct page *page,
+static inline loff_t folio_seek_hole_data(struct xa_state *xas,
+ struct address_space *mapping, struct folio *folio,
loff_t start, loff_t end, bool seek_data)
{
const struct address_space_operations *ops = mapping->a_ops;
size_t offset, bsz = i_blocksize(mapping->host);
- if (xa_is_value(page) || PageUptodate(page))
+ if (xa_is_value(folio) || folio_test_uptodate(folio))
return seek_data ? start : end;
if (!ops->is_partially_uptodate)
return seek_data ? end : start;
xas_pause(xas);
rcu_read_unlock();
- lock_page(page);
- if (unlikely(page->mapping != mapping))
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping))
goto unlock;
- offset = offset_in_thp(page, start) & ~(bsz - 1);
+ offset = offset_in_folio(folio, start) & ~(bsz - 1);
do {
- if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
+ if (ops->is_partially_uptodate(folio, offset, bsz) ==
+ seek_data)
break;
start = (start + bsz) & ~(bsz - 1);
offset += bsz;
- } while (offset < thp_size(page));
+ } while (offset < folio_size(folio));
unlock:
- unlock_page(page);
+ folio_unlock(folio);
rcu_read_lock();
return start;
}
-static inline
-unsigned int seek_page_size(struct xa_state *xas, struct page *page)
+static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio)
{
- if (xa_is_value(page))
+ if (xa_is_value(folio))
return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
- return thp_size(page);
+ return folio_size(folio);
}
/**
@@ -2857,15 +2896,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
pgoff_t max = (end - 1) >> PAGE_SHIFT;
bool seek_data = (whence == SEEK_DATA);
- struct page *page;
+ struct folio *folio;
if (end <= start)
return -ENXIO;
rcu_read_lock();
- while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
+ while ((folio = find_get_entry(&xas, max, XA_PRESENT))) {
loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
- unsigned int seek_size;
+ size_t seek_size;
if (start < pos) {
if (!seek_data)
@@ -2873,9 +2912,9 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
start = pos;
}
- seek_size = seek_page_size(&xas, page);
- pos = round_up(pos + 1, seek_size);
- start = page_seek_hole_data(&xas, mapping, page, start, pos,
+ seek_size = seek_folio_size(&xas, folio);
+ pos = round_up((u64)pos + 1, seek_size);
+ start = folio_seek_hole_data(&xas, mapping, folio, start, pos,
seek_data);
if (start < pos)
goto unlock;
@@ -2883,15 +2922,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
break;
if (seek_size > PAGE_SIZE)
xas_set(&xas, pos >> PAGE_SHIFT);
- if (!xa_is_value(page))
- put_page(page);
+ if (!xa_is_value(folio))
+ folio_put(folio);
}
if (seek_data)
start = -ENXIO;
unlock:
rcu_read_unlock();
- if (page && !xa_is_value(page))
- put_page(page);
+ if (folio && !xa_is_value(folio))
+ folio_put(folio);
if (start > end)
return end;
return start;
@@ -2900,21 +2939,20 @@ unlock:
#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS (100)
/*
- * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
+ * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
* @vmf - the vm_fault for this fault.
- * @page - the page to lock.
+ * @folio - the folio to lock.
* @fpin - the pointer to the file we may pin (or is already pinned).
*
- * This works similar to lock_page_or_retry in that it can drop the mmap_lock.
- * It differs in that it actually returns the page locked if it returns 1 and 0
- * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin
- * will point to the pinned file and needs to be fput()'ed at a later point.
+ * This works similar to lock_folio_or_retry in that it can drop the
+ * mmap_lock. It differs in that it actually returns the folio locked
+ * if it returns 1 and 0 if it couldn't lock the folio. If we did have
+ * to drop the mmap_lock then fpin will point to the pinned file and
+ * needs to be fput()'ed at a later point.
*/
-static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
struct file **fpin)
{
- struct folio *folio = page_folio(page);
-
if (folio_trylock(folio))
return 1;
@@ -2961,6 +2999,24 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct file *fpin = NULL;
unsigned int mmap_miss;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ /* Use the readahead code, even if readahead is disabled */
+ if (vmf->vma->vm_flags & VM_HUGEPAGE) {
+ fpin = maybe_unlock_mmap_for_io(vmf, fpin);
+ ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
+ ra->size = HPAGE_PMD_NR;
+ /*
+ * Fetch two PMD folios, so we get the chance to actually
+ * readahead, unless we've been told not to.
+ */
+ if (!(vmf->vma->vm_flags & VM_RAND_READ))
+ ra->size *= 2;
+ ra->async_size = HPAGE_PMD_NR;
+ page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
+ return fpin;
+ }
+#endif
+
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ)
return fpin;
@@ -2993,7 +3049,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
ra->size = ra->ra_pages;
ra->async_size = ra->ra_pages / 4;
ractl._index = ra->start;
- do_page_cache_ra(&ractl, ra->size, ra->async_size);
+ page_cache_ra_order(&ractl, ra, 0);
return fpin;
}
@@ -3003,25 +3059,25 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
* was pinned if we have to drop the mmap_lock in order to do IO.
*/
static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
- struct page *page)
+ struct folio *folio)
{
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
- struct address_space *mapping = file->f_mapping;
+ DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);
struct file *fpin = NULL;
unsigned int mmap_miss;
- pgoff_t offset = vmf->pgoff;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
+
mmap_miss = READ_ONCE(ra->mmap_miss);
if (mmap_miss)
WRITE_ONCE(ra->mmap_miss, --mmap_miss);
- if (PageReadahead(page)) {
+
+ if (folio_test_readahead(folio)) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_async_readahead(mapping, ra, file,
- page, offset, ra->ra_pages);
+ page_cache_async_ra(&ractl, folio, ra->ra_pages);
}
return fpin;
}
@@ -3040,7 +3096,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
* vma->vm_mm->mmap_lock must be held on entry.
*
* If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
- * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
+ * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap().
*
* If our return value does not have VM_FAULT_RETRY set, the mmap_lock
* has not been released.
@@ -3056,28 +3112,27 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
struct file *fpin = NULL;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- pgoff_t offset = vmf->pgoff;
- pgoff_t max_off;
- struct page *page;
+ pgoff_t max_idx, index = vmf->pgoff;
+ struct folio *folio;
vm_fault_t ret = 0;
bool mapping_locked = false;
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off))
+ max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(index >= max_idx))
return VM_FAULT_SIGBUS;
/*
* Do we have something in the page cache already?
*/
- page = find_get_page(mapping, offset);
- if (likely(page)) {
+ folio = filemap_get_folio(mapping, index);
+ if (likely(folio)) {
/*
* We found the page, so try async readahead before waiting for
* the lock.
*/
if (!(vmf->flags & FAULT_FLAG_TRIED))
- fpin = do_async_mmap_readahead(vmf, page);
- if (unlikely(!PageUptodate(page))) {
+ fpin = do_async_mmap_readahead(vmf, folio);
+ if (unlikely(!folio_test_uptodate(folio))) {
filemap_invalidate_lock_shared(mapping);
mapping_locked = true;
}
@@ -3089,17 +3144,17 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
fpin = do_sync_mmap_readahead(vmf);
retry_find:
/*
- * See comment in filemap_create_page() why we need
+ * See comment in filemap_create_folio() why we need
* invalidate_lock
*/
if (!mapping_locked) {
filemap_invalidate_lock_shared(mapping);
mapping_locked = true;
}
- page = pagecache_get_page(mapping, offset,
+ folio = __filemap_get_folio(mapping, index,
FGP_CREAT|FGP_FOR_MMAP,
vmf->gfp_mask);
- if (!page) {
+ if (!folio) {
if (fpin)
goto out_retry;
filemap_invalidate_unlock_shared(mapping);
@@ -3107,22 +3162,22 @@ retry_find:
}
}
- if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+ if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
goto out_retry;
/* Did it get truncated? */
- if (unlikely(compound_head(page)->mapping != mapping)) {
- unlock_page(page);
- put_page(page);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto retry_find;
}
- VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
/*
* We have a locked page in the page cache, now we need to check
* that it's up-to-date. If not, it is going to be due to an error.
*/
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
/*
* The page was in cache and uptodate and now it is not.
* Strange but possible since we didn't hold the page lock all
@@ -3130,8 +3185,8 @@ retry_find:
* try again.
*/
if (!mapping_locked) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto retry_find;
}
goto page_not_uptodate;
@@ -3143,7 +3198,7 @@ retry_find:
* redo the fault.
*/
if (fpin) {
- unlock_page(page);
+ folio_unlock(folio);
goto out_retry;
}
if (mapping_locked)
@@ -3153,14 +3208,14 @@ retry_find:
* Found the page and have a reference on it.
* We must recheck i_size under page lock.
*/
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off)) {
- unlock_page(page);
- put_page(page);
+ max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(index >= max_idx)) {
+ folio_unlock(folio);
+ folio_put(folio);
return VM_FAULT_SIGBUS;
}
- vmf->page = page;
+ vmf->page = folio_file_page(folio, index);
return ret | VM_FAULT_LOCKED;
page_not_uptodate:
@@ -3171,10 +3226,10 @@ page_not_uptodate:
* and we need to check for errors.
*/
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- error = filemap_read_page(file, mapping, page);
+ error = filemap_read_folio(file, mapping, folio);
if (fpin)
goto out_retry;
- put_page(page);
+ folio_put(folio);
if (!error || error == AOP_TRUNCATED_PAGE)
goto retry_find;
@@ -3188,8 +3243,8 @@ out_retry:
* re-find the vma and come back and find our hopefully still populated
* page.
*/
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
if (mapping_locked)
filemap_invalidate_unlock_shared(mapping);
if (fpin)
@@ -3231,50 +3286,48 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
return false;
}
-static struct page *next_uptodate_page(struct page *page,
+static struct folio *next_uptodate_page(struct folio *folio,
struct address_space *mapping,
struct xa_state *xas, pgoff_t end_pgoff)
{
unsigned long max_idx;
do {
- if (!page)
+ if (!folio)
return NULL;
- if (xas_retry(xas, page))
+ if (xas_retry(xas, folio))
continue;
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- if (PageLocked(page))
+ if (folio_test_locked(folio))
continue;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
continue;
/* Has the page moved or been split? */
- if (unlikely(page != xas_reload(xas)))
- goto skip;
- if (!PageUptodate(page) || PageReadahead(page))
+ if (unlikely(folio != xas_reload(xas)))
goto skip;
- if (PageHWPoison(page))
+ if (!folio_test_uptodate(folio) || folio_test_readahead(folio))
goto skip;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto skip;
- if (page->mapping != mapping)
+ if (folio->mapping != mapping)
goto unlock;
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas->xa_index >= max_idx)
goto unlock;
- return page;
+ return folio;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
skip:
- put_page(page);
- } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
+ folio_put(folio);
+ } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL);
return NULL;
}
-static inline struct page *first_map_page(struct address_space *mapping,
+static inline struct folio *first_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
@@ -3282,7 +3335,7 @@ static inline struct page *first_map_page(struct address_space *mapping,
mapping, xas, end_pgoff);
}
-static inline struct page *next_map_page(struct address_space *mapping,
+static inline struct folio *next_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
@@ -3299,16 +3352,17 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *head, *page;
+ struct folio *folio;
+ struct page *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
vm_fault_t ret = 0;
rcu_read_lock();
- head = first_map_page(mapping, &xas, end_pgoff);
- if (!head)
+ folio = first_map_page(mapping, &xas, end_pgoff);
+ if (!folio)
goto out;
- if (filemap_map_pmd(vmf, head)) {
+ if (filemap_map_pmd(vmf, &folio->page)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3316,7 +3370,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
do {
- page = find_subpage(head, xas.xa_index);
+again:
+ page = folio_file_page(folio, xas.xa_index);
if (PageHWPoison(page))
goto unlock;
@@ -3337,12 +3392,21 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
do_set_pte(vmf, page, addr);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, addr, vmf->pte);
- unlock_page(head);
+ if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
+ folio_unlock(folio);
continue;
unlock:
- unlock_page(head);
- put_page(head);
- } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
+ if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+ xas.xa_index++;
+ goto again;
+ }
+ folio_unlock(folio);
+ folio_put(folio);
+ } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
@@ -3354,24 +3418,24 @@ EXPORT_SYMBOL(filemap_map_pages);
vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
vm_fault_t ret = VM_FAULT_LOCKED;
sb_start_pagefault(mapping->host->i_sb);
file_update_time(vmf->vma->vm_file);
- lock_page(page);
- if (page->mapping != mapping) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
ret = VM_FAULT_NOPAGE;
goto out;
}
/*
- * We mark the page dirty already here so that when freeze is in
+ * We mark the folio dirty already here so that when freeze is in
* progress, we are guaranteed that writeback during freezing will
- * see the dirty page and writeprotect it again.
+ * see the dirty folio and writeprotect it again.
*/
- set_page_dirty(page);
- wait_for_stable_page(page);
+ folio_mark_dirty(folio);
+ folio_wait_stable(folio);
out:
sb_end_pagefault(mapping->host->i_sb);
return ret;
@@ -3424,35 +3488,20 @@ EXPORT_SYMBOL(filemap_page_mkwrite);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_file_readonly_mmap);
-static struct page *wait_on_page_read(struct page *page)
-{
- if (!IS_ERR(page)) {
- wait_on_page_locked(page);
- if (!PageUptodate(page)) {
- put_page(page);
- page = ERR_PTR(-EIO);
- }
- }
- return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
- pgoff_t index,
- int (*filler)(void *, struct page *),
- void *data,
- gfp_t gfp)
+static struct folio *do_read_cache_folio(struct address_space *mapping,
+ pgoff_t index, filler_t filler, void *data, gfp_t gfp)
{
- struct page *page;
+ struct folio *folio;
int err;
repeat:
- page = find_get_page(mapping, index);
- if (!page) {
- page = __page_cache_alloc(gfp);
- if (!page)
+ folio = filemap_get_folio(mapping, index);
+ if (!folio) {
+ folio = filemap_alloc_folio(gfp, 0);
+ if (!folio)
return ERR_PTR(-ENOMEM);
- err = add_to_page_cache_lru(page, mapping, index, gfp);
+ err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
- put_page(page);
+ folio_put(folio);
if (err == -EEXIST)
goto repeat;
/* Presumably ENOMEM for xarray node */
@@ -3461,71 +3510,41 @@ repeat:
filler:
if (filler)
- err = filler(data, page);
+ err = filler(data, &folio->page);
else
- err = mapping->a_ops->readpage(data, page);
+ err = mapping->a_ops->readpage(data, &folio->page);
if (err < 0) {
- put_page(page);
+ folio_put(folio);
return ERR_PTR(err);
}
- page = wait_on_page_read(page);
- if (IS_ERR(page))
- return page;
+ folio_wait_locked(folio);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
+ return ERR_PTR(-EIO);
+ }
+
goto out;
}
- if (PageUptodate(page))
- goto out;
-
- /*
- * Page is not up to date and may be locked due to one of the following
- * case a: Page is being filled and the page lock is held
- * case b: Read/write error clearing the page uptodate status
- * case c: Truncation in progress (page locked)
- * case d: Reclaim in progress
- *
- * Case a, the page will be up to date when the page is unlocked.
- * There is no need to serialise on the page lock here as the page
- * is pinned so the lock gives no additional protection. Even if the
- * page is truncated, the data is still valid if PageUptodate as
- * it's a race vs truncate race.
- * Case b, the page will not be up to date
- * Case c, the page may be truncated but in itself, the data may still
- * be valid after IO completes as it's a read vs truncate race. The
- * operation must restart if the page is not uptodate on unlock but
- * otherwise serialising on page lock to stabilise the mapping gives
- * no additional guarantees to the caller as the page lock is
- * released before return.
- * Case d, similar to truncation. If reclaim holds the page lock, it
- * will be a race with remove_mapping that determines if the mapping
- * is valid on unlock but otherwise the data is valid and there is
- * no need to serialise with page lock.
- *
- * As the page lock gives no additional guarantee, we optimistically
- * wait on the page to be unlocked and check if it's up to date and
- * use the page if it is. Otherwise, the page lock is required to
- * distinguish between the different cases. The motivation is that we
- * avoid spurious serialisations and wakeups when multiple processes
- * wait on the same page for IO to complete.
- */
- wait_on_page_locked(page);
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
goto out;
- /* Distinguish between all the cases under the safety of the lock */
- lock_page(page);
+ if (!folio_trylock(folio)) {
+ folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ goto repeat;
+ }
- /* Case c or d, restart the operation */
- if (!page->mapping) {
- unlock_page(page);
- put_page(page);
+ /* Folio was truncated from mapping */
+ if (!folio->mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
/* Someone else locked and filled the page in a very small window */
- if (PageUptodate(page)) {
- unlock_page(page);
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
goto out;
}
@@ -3535,16 +3554,16 @@ filler:
* Clear page error before actual read, PG_error will be
* set again if read page fails.
*/
- ClearPageError(page);
+ folio_clear_error(folio);
goto filler;
out:
- mark_page_accessed(page);
- return page;
+ folio_mark_accessed(folio);
+ return folio;
}
/**
- * read_cache_page - read into page cache, fill it if needed
+ * read_cache_folio - read into page cache, fill it if needed
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
@@ -3559,10 +3578,27 @@ out:
*
* Return: up to date page on success, ERR_PTR() on failure.
*/
+struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index,
+ filler_t filler, void *data)
+{
+ return do_read_cache_folio(mapping, index, filler, data,
+ mapping_gfp_mask(mapping));
+}
+EXPORT_SYMBOL(read_cache_folio);
+
+static struct page *do_read_cache_page(struct address_space *mapping,
+ pgoff_t index, filler_t *filler, void *data, gfp_t gfp)
+{
+ struct folio *folio;
+
+ folio = do_read_cache_folio(mapping, index, filler, data, gfp);
+ if (IS_ERR(folio))
+ return &folio->page;
+ return folio_file_page(folio, index);
+}
+
struct page *read_cache_page(struct address_space *mapping,
- pgoff_t index,
- int (*filler)(void *, struct page *),
- void *data)
+ pgoff_t index, filler_t *filler, void *data)
{
return do_read_cache_page(mapping, index, filler, data,
mapping_gfp_mask(mapping));
@@ -3922,33 +3958,32 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
EXPORT_SYMBOL(generic_file_write_iter);
/**
- * try_to_release_page() - release old fs-specific metadata on a page
+ * filemap_release_folio() - Release fs-specific metadata on a folio.
+ * @folio: The folio which the kernel is trying to free.
+ * @gfp: Memory allocation flags (and I/O mode).
*
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
+ * The address_space is trying to release any data attached to a folio
+ * (presumably at folio->private).
*
- * The address_space is to try to release any data against the page
- * (presumably at page->private).
+ * This will also be called if the private_2 flag is set on a page,
+ * indicating that the folio has other metadata associated with it.
*
- * This may also be called if PG_fscache is set on a page, indicating that the
- * page is known to the local caching routines.
+ * The @gfp argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block
+ * (__GFP_RECLAIM & __GFP_FS).
*
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
- *
- * Return: %1 if the release was successful, otherwise return zero.
+ * Return: %true if the release was successful, otherwise %false.
*/
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
+bool filemap_release_folio(struct folio *folio, gfp_t gfp)
{
- struct address_space * const mapping = page->mapping;
+ struct address_space * const mapping = folio->mapping;
- BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
- return 0;
+ BUG_ON(!folio_test_locked(folio));
+ if (folio_test_writeback(folio))
+ return false;
if (mapping && mapping->a_ops->releasepage)
- return mapping->a_ops->releasepage(page, gfp_mask);
- return try_to_free_buffers(page);
+ return mapping->a_ops->releasepage(&folio->page, gfp);
+ return try_to_free_buffers(&folio->page);
}
-
-EXPORT_SYMBOL(try_to_release_page);
+EXPORT_SYMBOL(filemap_release_folio);