aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c1038
1 files changed, 484 insertions, 554 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index daa0e23a6ee6..2fd9b2f24025 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,99 +121,97 @@
*/
static void page_cache_delete(struct address_space *mapping,
- struct page *page, void *shadow)
+ struct folio *folio, void *shadow)
{
- XA_STATE(xas, &mapping->i_pages, page->index);
- unsigned int nr = 1;
+ XA_STATE(xas, &mapping->i_pages, folio->index);
+ long nr = 1;
mapping_set_update(&xas, mapping);
/* hugetlb pages are represented by a single entry in the xarray */
- if (!PageHuge(page)) {
- xas_set_order(&xas, page->index, compound_order(page));
- nr = compound_nr(page);
+ if (!folio_test_hugetlb(folio)) {
+ xas_set_order(&xas, folio->index, folio_order(folio));
+ nr = folio_nr_pages(folio);
}
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(nr != 1 && shadow, page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
xas_store(&xas, shadow);
xas_init_marks(&xas);
- page->mapping = NULL;
+ folio->mapping = NULL;
/* Leave page->index set: truncation lookup relies upon it */
mapping->nrpages -= nr;
}
-static void unaccount_page_cache_page(struct address_space *mapping,
- struct page *page)
+static void filemap_unaccount_folio(struct address_space *mapping,
+ struct folio *folio)
{
- int nr;
+ long nr;
/*
* if we're uptodate, flush out into the cleancache, otherwise
* invalidate any existing cleancache entries. We can't leave
* stale data around in the cleancache once our page is gone
*/
- if (PageUptodate(page) && PageMappedToDisk(page))
- cleancache_put_page(page);
+ if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio))
+ cleancache_put_page(&folio->page);
else
- cleancache_invalidate_page(mapping, page);
+ cleancache_invalidate_page(mapping, &folio->page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(page_mapped(page), page);
- if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
+ VM_BUG_ON_FOLIO(folio_mapped(folio), folio);
+ if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) {
int mapcount;
pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
- current->comm, page_to_pfn(page));
- dump_page(page, "still mapped when deleted");
+ current->comm, folio_pfn(folio));
+ dump_page(&folio->page, "still mapped when deleted");
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
- mapcount = page_mapcount(page);
+ mapcount = page_mapcount(&folio->page);
if (mapping_exiting(mapping) &&
- page_count(page) >= mapcount + 2) {
+ folio_ref_count(folio) >= mapcount + 2) {
/*
* All vmas have already been torn down, so it's
- * a good bet that actually the page is unmapped,
+ * a good bet that actually the folio is unmapped,
* and we'd prefer not to leak it: if we're wrong,
* some other bad page check should catch it later.
*/
- page_mapcount_reset(page);
- page_ref_sub(page, mapcount);
+ page_mapcount_reset(&folio->page);
+ folio_ref_sub(folio, mapcount);
}
}
- /* hugetlb pages do not participate in page cache accounting. */
- if (PageHuge(page))
+ /* hugetlb folios do not participate in page cache accounting. */
+ if (folio_test_hugetlb(folio))
return;
- nr = thp_nr_pages(page);
+ nr = folio_nr_pages(folio);
- __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
- if (PageSwapBacked(page)) {
- __mod_lruvec_page_state(page, NR_SHMEM, -nr);
- if (PageTransHuge(page))
- __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr);
- } else if (PageTransHuge(page)) {
- __mod_lruvec_page_state(page, NR_FILE_THPS, -nr);
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
+ if (folio_test_pmd_mappable(folio))
+ __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr);
+ } else if (folio_test_pmd_mappable(folio)) {
+ __lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr);
filemap_nr_thps_dec(mapping);
}
/*
- * At this point page must be either written or cleaned by
- * truncate. Dirty page here signals a bug and loss of
+ * At this point folio must be either written or cleaned by
+ * truncate. Dirty folio here signals a bug and loss of
* unwritten data.
*
- * This fixes dirty accounting after removing the page entirely
- * but leaves PageDirty set: it has no effect for truncated
- * page and anyway will be cleared before returning page into
+ * This fixes dirty accounting after removing the folio entirely
+ * but leaves the dirty flag set: it has no effect for truncated
+ * folio and anyway will be cleared before returning folio to
* buddy allocator.
*/
- if (WARN_ON_ONCE(PageDirty(page)))
- account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
+ if (WARN_ON_ONCE(folio_test_dirty(folio)))
+ folio_account_cleaned(folio, mapping,
+ inode_to_wb(mapping->host));
}
/*
@@ -221,87 +219,83 @@ static void unaccount_page_cache_page(struct address_space *mapping,
* sure the page is locked and that nobody else uses it - or that usage
* is safe. The caller must hold the i_pages lock.
*/
-void __delete_from_page_cache(struct page *page, void *shadow)
+void __filemap_remove_folio(struct folio *folio, void *shadow)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
- trace_mm_filemap_delete_from_page_cache(page);
-
- unaccount_page_cache_page(mapping, page);
- page_cache_delete(mapping, page, shadow);
+ trace_mm_filemap_delete_from_page_cache(folio);
+ filemap_unaccount_folio(mapping, folio);
+ page_cache_delete(mapping, folio, shadow);
}
-static void page_cache_free_page(struct address_space *mapping,
- struct page *page)
+void filemap_free_folio(struct address_space *mapping, struct folio *folio)
{
void (*freepage)(struct page *);
freepage = mapping->a_ops->freepage;
if (freepage)
- freepage(page);
+ freepage(&folio->page);
- if (PageTransHuge(page) && !PageHuge(page)) {
- page_ref_sub(page, thp_nr_pages(page));
- VM_BUG_ON_PAGE(page_count(page) <= 0, page);
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio)) {
+ folio_ref_sub(folio, folio_nr_pages(folio));
+ VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio);
} else {
- put_page(page);
+ folio_put(folio);
}
}
/**
- * delete_from_page_cache - delete page from page cache
- * @page: the page which the kernel is trying to remove from page cache
+ * filemap_remove_folio - Remove folio from page cache.
+ * @folio: The folio.
*
- * This must be called only on pages that have been verified to be in the page
- * cache and locked. It will never put the page into the free list, the caller
- * has a reference on the page.
+ * This must be called only on folios that are locked and have been
+ * verified to be in the page cache. It will never put the folio into
+ * the free list because the caller has a reference on the page.
*/
-void delete_from_page_cache(struct page *page)
+void filemap_remove_folio(struct folio *folio)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio->mapping;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
- __delete_from_page_cache(page, NULL);
+ __filemap_remove_folio(folio, NULL);
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
spin_unlock(&mapping->host->i_lock);
- page_cache_free_page(mapping, page);
+ filemap_free_folio(mapping, folio);
}
-EXPORT_SYMBOL(delete_from_page_cache);
/*
- * page_cache_delete_batch - delete several pages from page cache
- * @mapping: the mapping to which pages belong
- * @pvec: pagevec with pages to delete
+ * page_cache_delete_batch - delete several folios from page cache
+ * @mapping: the mapping to which folios belong
+ * @fbatch: batch of folios to delete
*
- * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
- * It tolerates holes in @pvec (mapping entries at those indices are not
- * modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * The function walks over mapping->i_pages and removes folios passed in
+ * @fbatch from the mapping. The function expects @fbatch to be sorted
+ * by page index and is optimised for it to be dense.
+ * It tolerates holes in @fbatch (mapping entries at those indices are not
+ * modified).
*
* The function expects the i_pages lock to be held.
*/
static void page_cache_delete_batch(struct address_space *mapping,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
- XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
- int total_pages = 0;
+ XA_STATE(xas, &mapping->i_pages, fbatch->folios[0]->index);
+ long total_pages = 0;
int i = 0;
- struct page *page;
+ struct folio *folio;
mapping_set_update(&xas, mapping);
- xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec))
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (i >= folio_batch_count(fbatch))
break;
/* A swap/dax/shadow entry got inserted? Skip it. */
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
/*
* A page got inserted in our range? Skip it. We have our
@@ -310,54 +304,48 @@ static void page_cache_delete_batch(struct address_space *mapping,
* means our page has been removed, which shouldn't be
* possible because we're holding the PageLock.
*/
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
- page);
+ if (folio != fbatch->folios[i]) {
+ VM_BUG_ON_FOLIO(folio->index >
+ fbatch->folios[i]->index, folio);
continue;
}
- WARN_ON_ONCE(!PageLocked(page));
+ WARN_ON_ONCE(!folio_test_locked(folio));
- if (page->index == xas.xa_index)
- page->mapping = NULL;
- /* Leave page->index set: truncation lookup relies on it */
+ folio->mapping = NULL;
+ /* Leave folio->index set: truncation lookup relies on it */
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + compound_nr(page) - 1 == xas.xa_index)
- i++;
+ i++;
xas_store(&xas, NULL);
- total_pages++;
+ total_pages += folio_nr_pages(folio);
}
mapping->nrpages -= total_pages;
}
void delete_from_page_cache_batch(struct address_space *mapping,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
int i;
- if (!pagevec_count(pvec))
+ if (!folio_batch_count(fbatch))
return;
spin_lock(&mapping->host->i_lock);
xa_lock_irq(&mapping->i_pages);
- for (i = 0; i < pagevec_count(pvec); i++) {
- trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
+ for (i = 0; i < folio_batch_count(fbatch); i++) {
+ struct folio *folio = fbatch->folios[i];
- unaccount_page_cache_page(mapping, pvec->pages[i]);
+ trace_mm_filemap_delete_from_page_cache(folio);
+ filemap_unaccount_folio(mapping, folio);
}
- page_cache_delete_batch(mapping, pvec);
+ page_cache_delete_batch(mapping, fbatch);
xa_unlock_irq(&mapping->i_pages);
if (mapping_shrinkable(mapping))
inode_add_lru(mapping->host);
spin_unlock(&mapping->host->i_lock);
- for (i = 0; i < pagevec_count(pvec); i++)
- page_cache_free_page(mapping, pvec->pages[i]);
+ for (i = 0; i < folio_batch_count(fbatch); i++)
+ filemap_free_folio(mapping, fbatch->folios[i]);
}
int filemap_check_errors(struct address_space *mapping)
@@ -646,8 +634,8 @@ static bool mapping_needs_writeback(struct address_space *mapping)
return mapping->nrpages;
}
-static bool filemap_range_has_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
+bool filemap_range_has_writeback(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
{
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
pgoff_t max = end_byte >> PAGE_SHIFT;
@@ -667,34 +655,8 @@ static bool filemap_range_has_writeback(struct address_space *mapping,
}
rcu_read_unlock();
return page != NULL;
-
-}
-
-/**
- * filemap_range_needs_writeback - check if range potentially needs writeback
- * @mapping: address space within which to check
- * @start_byte: offset in bytes where the range starts
- * @end_byte: offset in bytes where the range ends (inclusive)
- *
- * Find at least one page in the range supplied, usually used to check if
- * direct writing in this range will trigger a writeback. Used by O_DIRECT
- * read/write with IOCB_NOWAIT, to see if the caller needs to do
- * filemap_write_and_wait_range() before proceeding.
- *
- * Return: %true if the caller should do filemap_write_and_wait_range() before
- * doing O_DIRECT to a page in this range, %false otherwise.
- */
-bool filemap_range_needs_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
-{
- if (!mapping_needs_writeback(mapping))
- return false;
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
- !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- return false;
- return filemap_range_has_writeback(mapping, start_byte, end_byte);
}
-EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+EXPORT_SYMBOL_GPL(filemap_range_has_writeback);
/**
* filemap_write_and_wait_range - write out & wait on a file range
@@ -959,7 +921,7 @@ unlock:
goto error;
}
- trace_mm_filemap_add_to_page_cache(&folio->page);
+ trace_mm_filemap_add_to_page_cache(folio);
return 0;
error:
folio->mapping = NULL;
@@ -1259,10 +1221,10 @@ enum behavior {
* __folio_lock() waiting on then setting PG_locked.
*/
SHARED, /* Hold ref to page and check the bit when woken, like
- * wait_on_page_writeback() waiting on PG_writeback.
+ * folio_wait_writeback() waiting on PG_writeback.
*/
DROP, /* Drop ref to page before wait, no check when woken,
- * like put_and_wait_on_page_locked() on PG_locked.
+ * like folio_put_wait_locked() on PG_locked.
*/
};
@@ -1439,22 +1401,21 @@ int folio_wait_bit_killable(struct folio *folio, int bit_nr)
EXPORT_SYMBOL(folio_wait_bit_killable);
/**
- * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
- * @page: The page to wait for.
+ * folio_put_wait_locked - Drop a reference and wait for it to be unlocked
+ * @folio: The folio to wait for.
* @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
*
- * The caller should hold a reference on @page. They expect the page to
+ * The caller should hold a reference on @folio. They expect the page to
* become unlocked relatively soon, but do not wish to hold up migration
- * (for example) by holding the reference while waiting for the page to
+ * (for example) by holding the reference while waiting for the folio to
* come unlocked. After this function returns, the caller should not
- * dereference @page.
+ * dereference @folio.
*
- * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
+ * Return: 0 if the folio was unlocked or -EINTR if interrupted by a signal.
*/
-int put_and_wait_on_page_locked(struct page *page, int state)
+int folio_put_wait_locked(struct folio *folio, int state)
{
- return folio_wait_bit_common(page_folio(page), PG_locked, state,
- DROP);
+ return folio_wait_bit_common(folio, PG_locked, state, DROP);
}
/**
@@ -1979,37 +1940,36 @@ no_page:
}
EXPORT_SYMBOL(__filemap_get_folio);
-static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
+static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,
xa_mark_t mark)
{
- struct page *page;
+ struct folio *folio;
retry:
if (mark == XA_PRESENT)
- page = xas_find(xas, max);
+ folio = xas_find(xas, max);
else
- page = xas_find_marked(xas, max, mark);
+ folio = xas_find_marked(xas, max, mark);
- if (xas_retry(xas, page))
+ if (xas_retry(xas, folio))
goto retry;
/*
* A shadow entry of a recently evicted page, a swap
* entry from shmem/tmpfs or a DAX entry. Return it
* without attempting to raise page count.
*/
- if (!page || xa_is_value(page))
- return page;
+ if (!folio || xa_is_value(folio))
+ return folio;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
goto reset;
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(xas))) {
- put_page(page);
+ if (unlikely(folio != xas_reload(xas))) {
+ folio_put(folio);
goto reset;
}
- return page;
+ return folio;
reset:
xas_reset(xas);
goto retry;
@@ -2020,56 +1980,36 @@ reset:
* @mapping: The address_space to search
* @start: The starting page cache index
* @end: The final page index (inclusive).
- * @pvec: Where the resulting entries are placed.
+ * @fbatch: Where the resulting entries are placed.
* @indices: The cache indices corresponding to the entries in @entries
*
* find_get_entries() will search for and return a batch of entries in
- * the mapping. The entries are placed in @pvec. find_get_entries()
- * takes a reference on any actual pages it returns.
+ * the mapping. The entries are placed in @fbatch. find_get_entries()
+ * takes a reference on any actual folios it returns.
*
- * The search returns a group of mapping-contiguous page cache entries
- * with ascending indexes. There may be holes in the indices due to
- * not-present pages.
+ * The entries have ascending indexes. The indices may not be consecutive
+ * due to not-present entries or large folios.
*
- * Any shadow entries of evicted pages, or swap entries from
+ * Any shadow entries of evicted folios, or swap entries from
* shmem/tmpfs, are included in the returned array.
*
- * If it finds a Transparent Huge Page, head or tail, find_get_entries()
- * stops at that page: the caller is likely to have a better way to handle
- * the compound page as a whole, and then skip its extent, than repeatedly
- * calling find_get_entries() to return all its tails.
- *
- * Return: the number of pages and shadow entries which were found.
+ * Return: The number of entries which were found.
*/
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
- pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+ pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
- unsigned int ret = 0;
- unsigned nr_entries = PAGEVEC_SIZE;
+ struct folio *folio;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
- /*
- * Terminate early on finding a THP, to allow the caller to
- * handle it all at once; but continue if this is hugetlbfs.
- */
- if (!xa_is_value(page) && PageTransHuge(page) &&
- !PageHuge(page)) {
- page = find_subpage(page, xas.xa_index);
- nr_entries = ret + 1;
- }
-
- indices[ret] = xas.xa_index;
- pvec->pages[ret] = page;
- if (++ret == nr_entries)
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
+ indices[fbatch->nr] = xas.xa_index;
+ if (!folio_batch_add(fbatch, folio))
break;
}
rcu_read_unlock();
- pvec->nr = ret;
- return ret;
+ return folio_batch_count(fbatch);
}
/**
@@ -2077,63 +2017,64 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
* @mapping: The address_space to search.
* @start: The starting page cache index.
* @end: The final page index (inclusive).
- * @pvec: Where the resulting entries are placed.
- * @indices: The cache indices of the entries in @pvec.
+ * @fbatch: Where the resulting entries are placed.
+ * @indices: The cache indices of the entries in @fbatch.
*
* find_lock_entries() will return a batch of entries from @mapping.
- * Swap, shadow and DAX entries are included. Pages are returned
- * locked and with an incremented refcount. Pages which are locked by
- * somebody else or under writeback are skipped. Only the head page of
- * a THP is returned. Pages which are partially outside the range are
- * not returned.
+ * Swap, shadow and DAX entries are included. Folios are returned
+ * locked and with an incremented refcount. Folios which are locked
+ * by somebody else or under writeback are skipped. Folios which are
+ * partially outside the range are not returned.
*
* The entries have ascending indexes. The indices may not be consecutive
- * due to not-present entries, THP pages, pages which could not be locked
- * or pages under writeback.
+ * due to not-present entries, large folios, folios which could not be
+ * locked or folios under writeback.
*
* Return: The number of entries which were found.
*/
unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
- pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+ pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
+ struct folio *folio;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
- if (!xa_is_value(page)) {
- if (page->index < start)
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
+ if (!xa_is_value(folio)) {
+ if (folio->index < start)
goto put;
- if (page->index + thp_nr_pages(page) - 1 > end)
+ if (folio->index + folio_nr_pages(folio) - 1 > end)
goto put;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto put;
- if (page->mapping != mapping || PageWriteback(page))
+ if (folio->mapping != mapping ||
+ folio_test_writeback(folio))
goto unlock;
- VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
- page);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index),
+ folio);
}
- indices[pvec->nr] = xas.xa_index;
- if (!pagevec_add(pvec, page))
+ indices[fbatch->nr] = xas.xa_index;
+ if (!folio_batch_add(fbatch, folio))
break;
- goto next;
+ continue;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
put:
- put_page(page);
-next:
- if (!xa_is_value(page) && PageTransHuge(page)) {
- unsigned int nr_pages = thp_nr_pages(page);
-
- /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
- xas_set(&xas, page->index + nr_pages);
- if (xas.xa_index < nr_pages)
- break;
- }
+ folio_put(folio);
}
rcu_read_unlock();
- return pagevec_count(pvec);
+ return folio_batch_count(fbatch);
+}
+
+static inline
+bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
+{
+ if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+ return false;
+ if (index >= max)
+ return false;
+ return index < folio->index + folio_nr_pages(folio) - 1;
}
/**
@@ -2162,23 +2103,29 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, *start);
- struct page *page;
+ struct folio *folio;
unsigned ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
/* Skip over shadow, swap and DAX entries */
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- pages[ret] = find_subpage(page, xas.xa_index);
+again:
+ pages[ret] = folio_file_page(folio, xas.xa_index);
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
+ if (folio_more_pages(folio, xas.xa_index, end)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
}
/*
@@ -2213,36 +2160,41 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
unsigned int nr_pages, struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, index);
- struct page *page;
+ struct folio *folio;
unsigned int ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- for (page = xas_load(&xas); page; page = xas_next(&xas)) {
- if (xas_retry(&xas, page))
+ for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+ if (xas_retry(&xas, folio))
continue;
/*
* If the entry has been swapped out, we can stop looking.
* No current caller is looking for DAX entries.
*/
- if (xa_is_value(page))
+ if (xa_is_value(folio))
break;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
goto retry;
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
+ if (unlikely(folio != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+again:
+ pages[ret] = folio_file_page(folio, xas.xa_index);
if (++ret == nr_pages)
break;
+ if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
continue;
put_page:
- put_page(page);
+ folio_put(folio);
retry:
xas_reset(&xas);
}
@@ -2271,25 +2223,25 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, *index);
- struct page *page;
+ struct folio *folio;
unsigned ret = 0;
if (unlikely(!nr_pages))
return 0;
rcu_read_lock();
- while ((page = find_get_entry(&xas, end, tag))) {
+ while ((folio = find_get_entry(&xas, end, tag))) {
/*
* Shadow entries should never be tagged, but this iteration
* is lockless so there is a window for page reclaim to evict
* a page we saw tagged. Skip over it.
*/
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- pages[ret] = page;
+ pages[ret] = &folio->page;
if (++ret == nr_pages) {
- *index = page->index + thp_nr_pages(page);
+ *index = folio->index + folio_nr_pages(folio);
goto out;
}
}
@@ -2332,52 +2284,50 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
}
/*
- * filemap_get_read_batch - Get a batch of pages for read
+ * filemap_get_read_batch - Get a batch of folios for read
*
- * Get a batch of pages which represent a contiguous range of bytes
- * in the file. No tail pages will be returned. If @index is in the
- * middle of a THP, the entire THP will be returned. The last page in
- * the batch may have Readahead set or be not Uptodate so that the
- * caller can take the appropriate action.
+ * Get a batch of folios which represent a contiguous range of bytes in
+ * the file. No exceptional entries will be returned. If @index is in
+ * the middle of a folio, the entire folio will be returned. The last
+ * folio in the batch may have the readahead flag set or the uptodate flag
+ * clear so that the caller can take the appropriate action.
*/
static void filemap_get_read_batch(struct address_space *mapping,
- pgoff_t index, pgoff_t max, struct pagevec *pvec)
+ pgoff_t index, pgoff_t max, struct folio_batch *fbatch)
{
XA_STATE(xas, &mapping->i_pages, index);
- struct page *head;
+ struct folio *folio;
rcu_read_lock();
- for (head = xas_load(&xas); head; head = xas_next(&xas)) {
- if (xas_retry(&xas, head))
+ for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
+ if (xas_retry(&xas, folio))
continue;
- if (xas.xa_index > max || xa_is_value(head))
+ if (xas.xa_index > max || xa_is_value(folio))
break;
- if (!page_cache_get_speculative(head))
+ if (!folio_try_get_rcu(folio))
goto retry;
- /* Has the page moved or been split? */
- if (unlikely(head != xas_reload(&xas)))
- goto put_page;
+ if (unlikely(folio != xas_reload(&xas)))
+ goto put_folio;
- if (!pagevec_add(pvec, head))
+ if (!folio_batch_add(fbatch, folio))
break;
- if (!PageUptodate(head))
+ if (!folio_test_uptodate(folio))
break;
- if (PageReadahead(head))
+ if (folio_test_readahead(folio))
break;
- xas.xa_index = head->index + thp_nr_pages(head) - 1;
- xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+ xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1);
continue;
-put_page:
- put_page(head);
+put_folio:
+ folio_put(folio);
retry:
xas_reset(&xas);
}
rcu_read_unlock();
}
-static int filemap_read_page(struct file *file, struct address_space *mapping,
- struct page *page)
+static int filemap_read_folio(struct file *file, struct address_space *mapping,
+ struct folio *folio)
{
int error;
@@ -2386,52 +2336,51 @@ static int filemap_read_page(struct file *file, struct address_space *mapping,
* eg. multipath errors. PG_error will be set again if readpage
* fails.
*/
- ClearPageError(page);
+ folio_clear_error(folio);
/* Start the actual read. The read will unlock the page. */
- error = mapping->a_ops->readpage(file, page);
+ error = mapping->a_ops->readpage(file, &folio->page);
if (error)
return error;
- error = wait_on_page_locked_killable(page);
+ error = folio_wait_locked_killable(folio);
if (error)
return error;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
return 0;
shrink_readahead_size_eio(&file->f_ra);
return -EIO;
}
static bool filemap_range_uptodate(struct address_space *mapping,
- loff_t pos, struct iov_iter *iter, struct page *page)
+ loff_t pos, struct iov_iter *iter, struct folio *folio)
{
int count;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
return true;
/* pipes can't handle partially uptodate pages */
if (iov_iter_is_pipe(iter))
return false;
if (!mapping->a_ops->is_partially_uptodate)
return false;
- if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
+ if (mapping->host->i_blkbits >= folio_shift(folio))
return false;
count = iter->count;
- if (page_offset(page) > pos) {
- count -= page_offset(page) - pos;
+ if (folio_pos(folio) > pos) {
+ count -= folio_pos(folio) - pos;
pos = 0;
} else {
- pos -= page_offset(page);
+ pos -= folio_pos(folio);
}
- return mapping->a_ops->is_partially_uptodate(page, pos, count);
+ return mapping->a_ops->is_partially_uptodate(&folio->page, pos, count);
}
static int filemap_update_page(struct kiocb *iocb,
struct address_space *mapping, struct iov_iter *iter,
- struct page *page)
+ struct folio *folio)
{
- struct folio *folio = page_folio(page);
int error;
if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2447,7 +2396,11 @@ static int filemap_update_page(struct kiocb *iocb,
goto unlock_mapping;
if (!(iocb->ki_flags & IOCB_WAITQ)) {
filemap_invalidate_unlock_shared(mapping);
- put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
+ /*
+ * This is where we usually end up waiting for a
+ * previously submitted readahead to finish.
+ */
+ folio_put_wait_locked(folio, TASK_KILLABLE);
return AOP_TRUNCATED_PAGE;
}
error = __folio_lock_async(folio, iocb->ki_waitq);
@@ -2460,14 +2413,14 @@ static int filemap_update_page(struct kiocb *iocb,
goto unlock;
error = 0;
- if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
+ if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, folio))
goto unlock;
error = -EAGAIN;
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
goto unlock;
- error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
+ error = filemap_read_folio(iocb->ki_filp, mapping, folio);
goto unlock_mapping;
unlock:
folio_unlock(folio);
@@ -2478,70 +2431,72 @@ unlock_mapping:
return error;
}
-static int filemap_create_page(struct file *file,
+static int filemap_create_folio(struct file *file,
struct address_space *mapping, pgoff_t index,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
- struct page *page;
+ struct folio *folio;
int error;
- page = page_cache_alloc(mapping);
- if (!page)
+ folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+ if (!folio)
return -ENOMEM;
/*
- * Protect against truncate / hole punch. Grabbing invalidate_lock here
- * assures we cannot instantiate and bring uptodate new pagecache pages
- * after evicting page cache during truncate and before actually
- * freeing blocks. Note that we could release invalidate_lock after
- * inserting the page into page cache as the locked page would then be
- * enough to synchronize with hole punching. But there are code paths
- * such as filemap_update_page() filling in partially uptodate pages or
- * ->readpages() that need to hold invalidate_lock while mapping blocks
- * for IO so let's hold the lock here as well to keep locking rules
- * simple.
+ * Protect against truncate / hole punch. Grabbing invalidate_lock
+ * here assures we cannot instantiate and bring uptodate new
+ * pagecache folios after evicting page cache during truncate
+ * and before actually freeing blocks. Note that we could
+ * release invalidate_lock after inserting the folio into
+ * the page cache as the locked folio would then be enough to
+ * synchronize with hole punching. But there are code paths
+ * such as filemap_update_page() filling in partially uptodate
+ * pages or ->readpages() that need to hold invalidate_lock
+ * while mapping blocks for IO so let's hold the lock here as
+ * well to keep locking rules simple.
*/
filemap_invalidate_lock_shared(mapping);
- error = add_to_page_cache_lru(page, mapping, index,
+ error = filemap_add_folio(mapping, folio, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
error = AOP_TRUNCATED_PAGE;
if (error)
goto error;
- error = filemap_read_page(file, mapping, page);
+ error = filemap_read_folio(file, mapping, folio);
if (error)
goto error;
filemap_invalidate_unlock_shared(mapping);
- pagevec_add(pvec, page);
+ folio_batch_add(fbatch, folio);
return 0;
error:
filemap_invalidate_unlock_shared(mapping);
- put_page(page);
+ folio_put(folio);
return error;
}
static int filemap_readahead(struct kiocb *iocb, struct file *file,
- struct address_space *mapping, struct page *page,
+ struct address_space *mapping, struct folio *folio,
pgoff_t last_index)
{
+ DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, folio->index);
+
if (iocb->ki_flags & IOCB_NOIO)
return -EAGAIN;
- page_cache_async_readahead(mapping, &file->f_ra, file, page,
- page->index, last_index - page->index);
+ page_cache_async_ra(&ractl, folio, last_index - folio->index);
return 0;
}
static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
- struct pagevec *pvec)
+ struct folio_batch *fbatch)
{
struct file *filp = iocb->ki_filp;
struct address_space *mapping = filp->f_mapping;
struct file_ra_state *ra = &filp->f_ra;
pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
pgoff_t last_index;
- struct page *page;
+ struct folio *folio;
int err = 0;
last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
@@ -2549,34 +2504,35 @@ retry:
if (fatal_signal_pending(current))
return -EINTR;
- filemap_get_read_batch(mapping, index, last_index, pvec);
- if (!pagevec_count(pvec)) {
+ filemap_get_read_batch(mapping, index, last_index, fbatch);
+ if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & IOCB_NOIO)
return -EAGAIN;
page_cache_sync_readahead(mapping, ra, filp, index,
last_index - index);
- filemap_get_read_batch(mapping, index, last_index, pvec);
+ filemap_get_read_batch(mapping, index, last_index, fbatch);
}
- if (!pagevec_count(pvec)) {
+ if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
return -EAGAIN;
- err = filemap_create_page(filp, mapping,
- iocb->ki_pos >> PAGE_SHIFT, pvec);
+ err = filemap_create_folio(filp, mapping,
+ iocb->ki_pos >> PAGE_SHIFT, fbatch);
if (err == AOP_TRUNCATED_PAGE)
goto retry;
return err;
}
- page = pvec->pages[pagevec_count(pvec) - 1];
- if (PageReadahead(page)) {
- err = filemap_readahead(iocb, filp, mapping, page, last_index);
+ folio = fbatch->folios[folio_batch_count(fbatch) - 1];
+ if (folio_test_readahead(folio)) {
+ err = filemap_readahead(iocb, filp, mapping, folio, last_index);
if (err)
goto err;
}
- if (!PageUptodate(page)) {
- if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
+ if (!folio_test_uptodate(folio)) {
+ if ((iocb->ki_flags & IOCB_WAITQ) &&
+ folio_batch_count(fbatch) > 1)
iocb->ki_flags |= IOCB_NOWAIT;
- err = filemap_update_page(iocb, mapping, iter, page);
+ err = filemap_update_page(iocb, mapping, iter, folio);
if (err)
goto err;
}
@@ -2584,8 +2540,8 @@ retry:
return 0;
err:
if (err < 0)
- put_page(page);
- if (likely(--pvec->nr))
+ folio_put(folio);
+ if (likely(--fbatch->nr))
return 0;
if (err == AOP_TRUNCATED_PAGE)
goto retry;
@@ -2612,7 +2568,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
struct file_ra_state *ra = &filp->f_ra;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
- struct pagevec pvec;
+ struct folio_batch fbatch;
int i, error = 0;
bool writably_mapped;
loff_t isize, end_offset;
@@ -2623,7 +2579,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
do {
cond_resched();
@@ -2639,7 +2595,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
break;
- error = filemap_get_pages(iocb, iter, &pvec);
+ error = filemap_get_pages(iocb, iter, &fbatch);
if (error < 0)
break;
@@ -2653,7 +2609,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
*/
isize = i_size_read(inode);
if (unlikely(iocb->ki_pos >= isize))
- goto put_pages;
+ goto put_folios;
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
/*
@@ -2668,33 +2624,29 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
*/
if (iocb->ki_pos >> PAGE_SHIFT !=
ra->prev_pos >> PAGE_SHIFT)
- mark_page_accessed(pvec.pages[0]);
+ folio_mark_accessed(fbatch.folios[0]);
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- size_t page_size = thp_size(page);
- size_t offset = iocb->ki_pos & (page_size - 1);
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ size_t fsize = folio_size(folio);
+ size_t offset = iocb->ki_pos & (fsize - 1);
size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
- page_size - offset);
+ fsize - offset);
size_t copied;
- if (end_offset < page_offset(page))
+ if (end_offset < folio_pos(folio))
break;
if (i > 0)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
/*
- * If users can be writing to this page using arbitrary
- * virtual addresses, take care about potential aliasing
- * before reading the page on the kernel side.
+ * If users can be writing to this folio using arbitrary
+ * virtual addresses, take care of potential aliasing
+ * before reading the folio on the kernel side.
*/
- if (writably_mapped) {
- int j;
-
- for (j = 0; j < thp_nr_pages(page); j++)
- flush_dcache_page(page + j);
- }
+ if (writably_mapped)
+ flush_dcache_folio(folio);
- copied = copy_page_to_iter(page, offset, bytes, iter);
+ copied = copy_folio_to_iter(folio, offset, bytes, iter);
already_read += copied;
iocb->ki_pos += copied;
@@ -2705,10 +2657,10 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
break;
}
}
-put_pages:
- for (i = 0; i < pagevec_count(&pvec); i++)
- put_page(pvec.pages[i]);
- pagevec_reinit(&pvec);
+put_folios:
+ for (i = 0; i < folio_batch_count(&fbatch); i++)
+ folio_put(fbatch.folios[i]);
+ folio_batch_init(&fbatch);
} while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
file_accessed(filp);
@@ -2793,44 +2745,44 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
EXPORT_SYMBOL(generic_file_read_iter);
-static inline loff_t page_seek_hole_data(struct xa_state *xas,
- struct address_space *mapping, struct page *page,
+static inline loff_t folio_seek_hole_data(struct xa_state *xas,
+ struct address_space *mapping, struct folio *folio,
loff_t start, loff_t end, bool seek_data)
{
const struct address_space_operations *ops = mapping->a_ops;
size_t offset, bsz = i_blocksize(mapping->host);
- if (xa_is_value(page) || PageUptodate(page))
+ if (xa_is_value(folio) || folio_test_uptodate(folio))
return seek_data ? start : end;
if (!ops->is_partially_uptodate)
return seek_data ? end : start;
xas_pause(xas);
rcu_read_unlock();
- lock_page(page);
- if (unlikely(page->mapping != mapping))
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping))
goto unlock;
- offset = offset_in_thp(page, start) & ~(bsz - 1);
+ offset = offset_in_folio(folio, start) & ~(bsz - 1);
do {
- if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
+ if (ops->is_partially_uptodate(&folio->page, offset, bsz) ==
+ seek_data)
break;
start = (start + bsz) & ~(bsz - 1);
offset += bsz;
- } while (offset < thp_size(page));
+ } while (offset < folio_size(folio));
unlock:
- unlock_page(page);
+ folio_unlock(folio);
rcu_read_lock();
return start;
}
-static inline
-unsigned int seek_page_size(struct xa_state *xas, struct page *page)
+static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio)
{
- if (xa_is_value(page))
+ if (xa_is_value(folio))
return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
- return thp_size(page);
+ return folio_size(folio);
}
/**
@@ -2857,15 +2809,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
pgoff_t max = (end - 1) >> PAGE_SHIFT;
bool seek_data = (whence == SEEK_DATA);
- struct page *page;
+ struct folio *folio;
if (end <= start)
return -ENXIO;
rcu_read_lock();
- while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
+ while ((folio = find_get_entry(&xas, max, XA_PRESENT))) {
loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
- unsigned int seek_size;
+ size_t seek_size;
if (start < pos) {
if (!seek_data)
@@ -2873,9 +2825,9 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
start = pos;
}
- seek_size = seek_page_size(&xas, page);
- pos = round_up(pos + 1, seek_size);
- start = page_seek_hole_data(&xas, mapping, page, start, pos,
+ seek_size = seek_folio_size(&xas, folio);
+ pos = round_up((u64)pos + 1, seek_size);
+ start = folio_seek_hole_data(&xas, mapping, folio, start, pos,
seek_data);
if (start < pos)
goto unlock;
@@ -2883,15 +2835,15 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
break;
if (seek_size > PAGE_SIZE)
xas_set(&xas, pos >> PAGE_SHIFT);
- if (!xa_is_value(page))
- put_page(page);
+ if (!xa_is_value(folio))
+ folio_put(folio);
}
if (seek_data)
start = -ENXIO;
unlock:
rcu_read_unlock();
- if (page && !xa_is_value(page))
- put_page(page);
+ if (folio && !xa_is_value(folio))
+ folio_put(folio);
if (start > end)
return end;
return start;
@@ -2900,21 +2852,20 @@ unlock:
#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS (100)
/*
- * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
+ * lock_folio_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
* @vmf - the vm_fault for this fault.
- * @page - the page to lock.
+ * @folio - the folio to lock.
* @fpin - the pointer to the file we may pin (or is already pinned).
*
- * This works similar to lock_page_or_retry in that it can drop the mmap_lock.
- * It differs in that it actually returns the page locked if it returns 1 and 0
- * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin
- * will point to the pinned file and needs to be fput()'ed at a later point.
+ * This works similar to lock_folio_or_retry in that it can drop the
+ * mmap_lock. It differs in that it actually returns the folio locked
+ * if it returns 1 and 0 if it couldn't lock the folio. If we did have
+ * to drop the mmap_lock then fpin will point to the pinned file and
+ * needs to be fput()'ed at a later point.
*/
-static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
+static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
struct file **fpin)
{
- struct folio *folio = page_folio(page);
-
if (folio_trylock(folio))
return 1;
@@ -3003,25 +2954,25 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
* was pinned if we have to drop the mmap_lock in order to do IO.
*/
static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
- struct page *page)
+ struct folio *folio)
{
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
- struct address_space *mapping = file->f_mapping;
+ DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, vmf->pgoff);
struct file *fpin = NULL;
unsigned int mmap_miss;
- pgoff_t offset = vmf->pgoff;
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
return fpin;
+
mmap_miss = READ_ONCE(ra->mmap_miss);
if (mmap_miss)
WRITE_ONCE(ra->mmap_miss, --mmap_miss);
- if (PageReadahead(page)) {
+
+ if (folio_test_readahead(folio)) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_async_readahead(mapping, ra, file,
- page, offset, ra->ra_pages);
+ page_cache_async_ra(&ractl, folio, ra->ra_pages);
}
return fpin;
}
@@ -3040,7 +2991,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
* vma->vm_mm->mmap_lock must be held on entry.
*
* If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
- * may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
+ * may be dropped before doing I/O or by lock_folio_maybe_drop_mmap().
*
* If our return value does not have VM_FAULT_RETRY set, the mmap_lock
* has not been released.
@@ -3056,28 +3007,27 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
struct file *fpin = NULL;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- pgoff_t offset = vmf->pgoff;
- pgoff_t max_off;
- struct page *page;
+ pgoff_t max_idx, index = vmf->pgoff;
+ struct folio *folio;
vm_fault_t ret = 0;
bool mapping_locked = false;
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off))
+ max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(index >= max_idx))
return VM_FAULT_SIGBUS;
/*
* Do we have something in the page cache already?
*/
- page = find_get_page(mapping, offset);
- if (likely(page)) {
+ folio = filemap_get_folio(mapping, index);
+ if (likely(folio)) {
/*
* We found the page, so try async readahead before waiting for
* the lock.
*/
if (!(vmf->flags & FAULT_FLAG_TRIED))
- fpin = do_async_mmap_readahead(vmf, page);
- if (unlikely(!PageUptodate(page))) {
+ fpin = do_async_mmap_readahead(vmf, folio);
+ if (unlikely(!folio_test_uptodate(folio))) {
filemap_invalidate_lock_shared(mapping);
mapping_locked = true;
}
@@ -3089,17 +3039,17 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
fpin = do_sync_mmap_readahead(vmf);
retry_find:
/*
- * See comment in filemap_create_page() why we need
+ * See comment in filemap_create_folio() why we need
* invalidate_lock
*/
if (!mapping_locked) {
filemap_invalidate_lock_shared(mapping);
mapping_locked = true;
}
- page = pagecache_get_page(mapping, offset,
+ folio = __filemap_get_folio(mapping, index,
FGP_CREAT|FGP_FOR_MMAP,
vmf->gfp_mask);
- if (!page) {
+ if (!folio) {
if (fpin)
goto out_retry;
filemap_invalidate_unlock_shared(mapping);
@@ -3107,22 +3057,22 @@ retry_find:
}
}
- if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
+ if (!lock_folio_maybe_drop_mmap(vmf, folio, &fpin))
goto out_retry;
/* Did it get truncated? */
- if (unlikely(compound_head(page)->mapping != mapping)) {
- unlock_page(page);
- put_page(page);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto retry_find;
}
- VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
/*
* We have a locked page in the page cache, now we need to check
* that it's up-to-date. If not, it is going to be due to an error.
*/
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
/*
* The page was in cache and uptodate and now it is not.
* Strange but possible since we didn't hold the page lock all
@@ -3130,8 +3080,8 @@ retry_find:
* try again.
*/
if (!mapping_locked) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto retry_find;
}
goto page_not_uptodate;
@@ -3143,7 +3093,7 @@ retry_find:
* redo the fault.
*/
if (fpin) {
- unlock_page(page);
+ folio_unlock(folio);
goto out_retry;
}
if (mapping_locked)
@@ -3153,14 +3103,14 @@ retry_find:
* Found the page and have a reference on it.
* We must recheck i_size under page lock.
*/
- max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
- if (unlikely(offset >= max_off)) {
- unlock_page(page);
- put_page(page);
+ max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(index >= max_idx)) {
+ folio_unlock(folio);
+ folio_put(folio);
return VM_FAULT_SIGBUS;
}
- vmf->page = page;
+ vmf->page = folio_file_page(folio, index);
return ret | VM_FAULT_LOCKED;
page_not_uptodate:
@@ -3171,10 +3121,10 @@ page_not_uptodate:
* and we need to check for errors.
*/
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- error = filemap_read_page(file, mapping, page);
+ error = filemap_read_folio(file, mapping, folio);
if (fpin)
goto out_retry;
- put_page(page);
+ folio_put(folio);
if (!error || error == AOP_TRUNCATED_PAGE)
goto retry_find;
@@ -3188,8 +3138,8 @@ out_retry:
* re-find the vma and come back and find our hopefully still populated
* page.
*/
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
if (mapping_locked)
filemap_invalidate_unlock_shared(mapping);
if (fpin)
@@ -3231,50 +3181,48 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
return false;
}
-static struct page *next_uptodate_page(struct page *page,
+static struct folio *next_uptodate_page(struct folio *folio,
struct address_space *mapping,
struct xa_state *xas, pgoff_t end_pgoff)
{
unsigned long max_idx;
do {
- if (!page)
+ if (!folio)
return NULL;
- if (xas_retry(xas, page))
+ if (xas_retry(xas, folio))
continue;
- if (xa_is_value(page))
+ if (xa_is_value(folio))
continue;
- if (PageLocked(page))
+ if (folio_test_locked(folio))
continue;
- if (!page_cache_get_speculative(page))
+ if (!folio_try_get_rcu(folio))
continue;
/* Has the page moved or been split? */
- if (unlikely(page != xas_reload(xas)))
- goto skip;
- if (!PageUptodate(page) || PageReadahead(page))
+ if (unlikely(folio != xas_reload(xas)))
goto skip;
- if (PageHWPoison(page))
+ if (!folio_test_uptodate(folio) || folio_test_readahead(folio))
goto skip;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto skip;
- if (page->mapping != mapping)
+ if (folio->mapping != mapping)
goto unlock;
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas->xa_index >= max_idx)
goto unlock;
- return page;
+ return folio;
unlock:
- unlock_page(page);
+ folio_unlock(folio);
skip:
- put_page(page);
- } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
+ folio_put(folio);
+ } while ((folio = xas_next_entry(xas, end_pgoff)) != NULL);
return NULL;
}
-static inline struct page *first_map_page(struct address_space *mapping,
+static inline struct folio *first_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
@@ -3282,7 +3230,7 @@ static inline struct page *first_map_page(struct address_space *mapping,
mapping, xas, end_pgoff);
}
-static inline struct page *next_map_page(struct address_space *mapping,
+static inline struct folio *next_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
@@ -3299,16 +3247,17 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *head, *page;
+ struct folio *folio;
+ struct page *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
vm_fault_t ret = 0;
rcu_read_lock();
- head = first_map_page(mapping, &xas, end_pgoff);
- if (!head)
+ folio = first_map_page(mapping, &xas, end_pgoff);
+ if (!folio)
goto out;
- if (filemap_map_pmd(vmf, head)) {
+ if (filemap_map_pmd(vmf, &folio->page)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -3316,7 +3265,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
do {
- page = find_subpage(head, xas.xa_index);
+again:
+ page = folio_file_page(folio, xas.xa_index);
if (PageHWPoison(page))
goto unlock;
@@ -3337,12 +3287,21 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
do_set_pte(vmf, page, addr);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, addr, vmf->pte);
- unlock_page(head);
+ if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+ xas.xa_index++;
+ folio_ref_inc(folio);
+ goto again;
+ }
+ folio_unlock(folio);
continue;
unlock:
- unlock_page(head);
- put_page(head);
- } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
+ if (folio_more_pages(folio, xas.xa_index, end_pgoff)) {
+ xas.xa_index++;
+ goto again;
+ }
+ folio_unlock(folio);
+ folio_put(folio);
+ } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
@@ -3354,24 +3313,24 @@ EXPORT_SYMBOL(filemap_map_pages);
vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
vm_fault_t ret = VM_FAULT_LOCKED;
sb_start_pagefault(mapping->host->i_sb);
file_update_time(vmf->vma->vm_file);
- lock_page(page);
- if (page->mapping != mapping) {
- unlock_page(page);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
ret = VM_FAULT_NOPAGE;
goto out;
}
/*
- * We mark the page dirty already here so that when freeze is in
+ * We mark the folio dirty already here so that when freeze is in
* progress, we are guaranteed that writeback during freezing will
- * see the dirty page and writeprotect it again.
+ * see the dirty folio and writeprotect it again.
*/
- set_page_dirty(page);
- wait_for_stable_page(page);
+ folio_mark_dirty(folio);
+ folio_wait_stable(folio);
out:
sb_end_pagefault(mapping->host->i_sb);
return ret;
@@ -3424,35 +3383,20 @@ EXPORT_SYMBOL(filemap_page_mkwrite);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_file_readonly_mmap);
-static struct page *wait_on_page_read(struct page *page)
+static struct folio *do_read_cache_folio(struct address_space *mapping,
+ pgoff_t index, filler_t filler, void *data, gfp_t gfp)
{
- if (!IS_ERR(page)) {
- wait_on_page_locked(page);
- if (!PageUptodate(page)) {
- put_page(page);
- page = ERR_PTR(-EIO);
- }
- }
- return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
- pgoff_t index,
- int (*filler)(void *, struct page *),
- void *data,
- gfp_t gfp)
-{
- struct page *page;
+ struct folio *folio;
int err;
repeat:
- page = find_get_page(mapping, index);
- if (!page) {
- page = __page_cache_alloc(gfp);
- if (!page)
+ folio = filemap_get_folio(mapping, index);
+ if (!folio) {
+ folio = filemap_alloc_folio(gfp, 0);
+ if (!folio)
return ERR_PTR(-ENOMEM);
- err = add_to_page_cache_lru(page, mapping, index, gfp);
+ err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
- put_page(page);
+ folio_put(folio);
if (err == -EEXIST)
goto repeat;
/* Presumably ENOMEM for xarray node */
@@ -3461,71 +3405,41 @@ repeat:
filler:
if (filler)
- err = filler(data, page);
+ err = filler(data, &folio->page);
else
- err = mapping->a_ops->readpage(data, page);
+ err = mapping->a_ops->readpage(data, &folio->page);
if (err < 0) {
- put_page(page);
+ folio_put(folio);
return ERR_PTR(err);
}
- page = wait_on_page_read(page);
- if (IS_ERR(page))
- return page;
+ folio_wait_locked(folio);
+ if (!folio_test_uptodate(folio)) {
+ folio_put(folio);
+ return ERR_PTR(-EIO);
+ }
+
goto out;
}
- if (PageUptodate(page))
- goto out;
-
- /*
- * Page is not up to date and may be locked due to one of the following
- * case a: Page is being filled and the page lock is held
- * case b: Read/write error clearing the page uptodate status
- * case c: Truncation in progress (page locked)
- * case d: Reclaim in progress
- *
- * Case a, the page will be up to date when the page is unlocked.
- * There is no need to serialise on the page lock here as the page
- * is pinned so the lock gives no additional protection. Even if the
- * page is truncated, the data is still valid if PageUptodate as
- * it's a race vs truncate race.
- * Case b, the page will not be up to date
- * Case c, the page may be truncated but in itself, the data may still
- * be valid after IO completes as it's a read vs truncate race. The
- * operation must restart if the page is not uptodate on unlock but
- * otherwise serialising on page lock to stabilise the mapping gives
- * no additional guarantees to the caller as the page lock is
- * released before return.
- * Case d, similar to truncation. If reclaim holds the page lock, it
- * will be a race with remove_mapping that determines if the mapping
- * is valid on unlock but otherwise the data is valid and there is
- * no need to serialise with page lock.
- *
- * As the page lock gives no additional guarantee, we optimistically
- * wait on the page to be unlocked and check if it's up to date and
- * use the page if it is. Otherwise, the page lock is required to
- * distinguish between the different cases. The motivation is that we
- * avoid spurious serialisations and wakeups when multiple processes
- * wait on the same page for IO to complete.
- */
- wait_on_page_locked(page);
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
goto out;
- /* Distinguish between all the cases under the safety of the lock */
- lock_page(page);
+ if (!folio_trylock(folio)) {
+ folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE);
+ goto repeat;
+ }
- /* Case c or d, restart the operation */
- if (!page->mapping) {
- unlock_page(page);
- put_page(page);
+ /* Folio was truncated from mapping */
+ if (!folio->mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
/* Someone else locked and filled the page in a very small window */
- if (PageUptodate(page)) {
- unlock_page(page);
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
goto out;
}
@@ -3535,16 +3449,16 @@ filler:
* Clear page error before actual read, PG_error will be
* set again if read page fails.
*/
- ClearPageError(page);
+ folio_clear_error(folio);
goto filler;
out:
- mark_page_accessed(page);
- return page;
+ folio_mark_accessed(folio);
+ return folio;
}
/**
- * read_cache_page - read into page cache, fill it if needed
+ * read_cache_folio - read into page cache, fill it if needed
* @mapping: the page's address_space
* @index: the page index
* @filler: function to perform the read
@@ -3559,10 +3473,27 @@ out:
*
* Return: up to date page on success, ERR_PTR() on failure.
*/
+struct folio *read_cache_folio(struct address_space *mapping, pgoff_t index,
+ filler_t filler, void *data)
+{
+ return do_read_cache_folio(mapping, index, filler, data,
+ mapping_gfp_mask(mapping));
+}
+EXPORT_SYMBOL(read_cache_folio);
+
+static struct page *do_read_cache_page(struct address_space *mapping,
+ pgoff_t index, filler_t *filler, void *data, gfp_t gfp)
+{
+ struct folio *folio;
+
+ folio = do_read_cache_folio(mapping, index, filler, data, gfp);
+ if (IS_ERR(folio))
+ return &folio->page;
+ return folio_file_page(folio, index);
+}
+
struct page *read_cache_page(struct address_space *mapping,
- pgoff_t index,
- int (*filler)(void *, struct page *),
- void *data)
+ pgoff_t index, filler_t *filler, void *data)
{
return do_read_cache_page(mapping, index, filler, data,
mapping_gfp_mask(mapping));
@@ -3922,33 +3853,32 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
EXPORT_SYMBOL(generic_file_write_iter);
/**
- * try_to_release_page() - release old fs-specific metadata on a page
- *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
+ * filemap_release_folio() - Release fs-specific metadata on a folio.
+ * @folio: The folio which the kernel is trying to free.
+ * @gfp: Memory allocation flags (and I/O mode).
*
- * The address_space is to try to release any data against the page
- * (presumably at page->private).
+ * The address_space is trying to release any data attached to a folio
+ * (presumably at folio->private).
*
- * This may also be called if PG_fscache is set on a page, indicating that the
- * page is known to the local caching routines.
+ * This will also be called if the private_2 flag is set on a page,
+ * indicating that the folio has other metadata associated with it.
*
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS).
+ * The @gfp argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block
+ * (__GFP_RECLAIM & __GFP_FS).
*
- * Return: %1 if the release was successful, otherwise return zero.
+ * Return: %true if the release was successful, otherwise %false.
*/
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
+bool filemap_release_folio(struct folio *folio, gfp_t gfp)
{
- struct address_space * const mapping = page->mapping;
+ struct address_space * const mapping = folio->mapping;
- BUG_ON(!PageLocked(page));
- if (PageWriteback(page))
- return 0;
+ BUG_ON(!folio_test_locked(folio));
+ if (folio_test_writeback(folio))
+ return false;
if (mapping && mapping->a_ops->releasepage)
- return mapping->a_ops->releasepage(page, gfp_mask);
- return try_to_free_buffers(page);
+ return mapping->a_ops->releasepage(&folio->page, gfp);
+ return try_to_free_buffers(&folio->page);
}
-
-EXPORT_SYMBOL(try_to_release_page);
+EXPORT_SYMBOL(filemap_release_folio);