aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/shmem.c454
1 files changed, 245 insertions, 209 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 42e5888bf84d..c1d8b8a1aa3b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -38,6 +38,7 @@
#include <linux/hugetlb.h>
#include <linux/fs_parser.h>
#include <linux/swapfile.h>
+#include <linux/iversion.h>
#include "swap.h"
static struct vfsmount *shm_mnt;
@@ -139,17 +140,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct folio **foliop, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type);
-static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp,
- gfp_t gfp, struct vm_area_struct *vma,
- struct vm_fault *vmf, vm_fault_t *fault_type);
-
-int shmem_getpage(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp)
-{
- return shmem_getpage_gfp(inode, index, pagep, sgp,
- mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
-}
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
@@ -190,7 +180,7 @@ static inline int shmem_reacct_size(unsigned long flags,
/*
* ... whereas tmpfs objects are accounted incrementally as
* pages are allocated, in order to allow large sparse files.
- * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
+ * shmem_get_folio reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
*/
static inline int shmem_acct_block(unsigned long flags, long pages)
@@ -472,20 +462,22 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
-bool shmem_is_huge(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
+bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode,
+ pgoff_t index, bool shmem_huge_force)
{
loff_t i_size;
if (!S_ISREG(inode->i_mode))
return false;
- if (shmem_huge == SHMEM_HUGE_DENY)
- return false;
if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
return false;
+ if (shmem_huge_force)
+ return true;
if (shmem_huge == SHMEM_HUGE_FORCE)
return true;
+ if (shmem_huge == SHMEM_HUGE_DENY)
+ return false;
switch (SHMEM_SB(inode->i_sb)->huge) {
case SHMEM_HUGE_ALWAYS:
@@ -629,7 +621,7 @@ next:
goto move_back;
}
- ret = split_huge_page(&folio->page);
+ ret = split_folio(folio);
folio_unlock(folio);
folio_put(folio);
@@ -680,8 +672,8 @@ static long shmem_unused_huge_count(struct super_block *sb,
#define shmem_huge SHMEM_HUGE_DENY
-bool shmem_is_huge(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
+bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode,
+ pgoff_t index, bool shmem_huge_force)
{
return false;
}
@@ -763,23 +755,22 @@ error:
}
/*
- * Like delete_from_page_cache, but substitutes swap for page.
+ * Like delete_from_page_cache, but substitutes swap for @folio.
*/
-static void shmem_delete_from_page_cache(struct page *page, void *radswap)
+static void shmem_delete_from_page_cache(struct folio *folio, void *radswap)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
+ long nr = folio_nr_pages(folio);
int error;
- VM_BUG_ON_PAGE(PageCompound(page), page);
-
xa_lock_irq(&mapping->i_pages);
- error = shmem_replace_entry(mapping, page->index, page, radswap);
- page->mapping = NULL;
- mapping->nrpages--;
- __dec_lruvec_page_state(page, NR_FILE_PAGES);
- __dec_lruvec_page_state(page, NR_SHMEM);
+ error = shmem_replace_entry(mapping, folio->index, folio, radswap);
+ folio->mapping = NULL;
+ mapping->nrpages -= nr;
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -nr);
+ __lruvec_stat_mod_folio(folio, NR_SHMEM, -nr);
xa_unlock_irq(&mapping->i_pages);
- put_page(page);
+ folio_put(folio);
BUG_ON(error);
}
@@ -886,10 +877,9 @@ void shmem_unlock_mapping(struct address_space *mapping)
static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
{
struct folio *folio;
- struct page *page;
/*
- * At first avoid shmem_getpage(,,,SGP_READ): that fails
+ * At first avoid shmem_get_folio(,,,SGP_READ): that fails
* beyond i_size, and reports fallocated pages as holes.
*/
folio = __filemap_get_folio(inode->i_mapping, index,
@@ -900,9 +890,9 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
* But read a page back from swap if any of it is within i_size
* (although in some cases this is just a waste of time).
*/
- page = NULL;
- shmem_getpage(inode, index, &page, SGP_READ);
- return page ? page_folio(page) : NULL;
+ folio = NULL;
+ shmem_get_folio(inode, index, &folio, SGP_READ);
+ return folio;
}
/*
@@ -1043,6 +1033,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
shmem_undo_range(inode, lstart, lend, false);
inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode_inc_iversion(inode);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1069,7 +1060,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns,
STATX_ATTR_NODUMP);
generic_fillattr(&init_user_ns, inode, stat);
- if (shmem_is_huge(NULL, inode, 0))
+ if (shmem_is_huge(NULL, inode, 0, false))
stat->blksize = HPAGE_PMD_SIZE;
if (request_mask & STATX_BTIME) {
@@ -1087,6 +1078,8 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
struct inode *inode = d_inode(dentry);
struct shmem_inode_info *info = SHMEM_I(inode);
int error;
+ bool update_mtime = false;
+ bool update_ctime = true;
error = setattr_prepare(&init_user_ns, dentry, attr);
if (error)
@@ -1107,7 +1100,9 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
if (error)
return error;
i_size_write(inode, newsize);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ update_mtime = true;
+ } else {
+ update_ctime = false;
}
if (newsize <= oldsize) {
loff_t holebegin = round_up(newsize, PAGE_SIZE);
@@ -1127,6 +1122,12 @@ static int shmem_setattr(struct user_namespace *mnt_userns,
setattr_copy(&init_user_ns, inode, attr);
if (attr->ia_valid & ATTR_MODE)
error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
+ if (!error && update_ctime) {
+ inode->i_ctime = current_time(inode);
+ if (update_mtime)
+ inode->i_mtime = inode->i_ctime;
+ inode_inc_iversion(inode);
+ }
return error;
}
@@ -1328,17 +1329,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
* "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
* and its shmem_writeback() needs them to be split when swapping.
*/
- if (PageTransCompound(page)) {
+ if (folio_test_large(folio)) {
/* Ensure the subpages are still dirty */
- SetPageDirty(page);
+ folio_test_set_dirty(folio);
if (split_huge_page(page) < 0)
goto redirty;
- ClearPageDirty(page);
+ folio = page_folio(page);
+ folio_clear_dirty(folio);
}
- BUG_ON(!PageLocked(page));
- mapping = page->mapping;
- index = page->index;
+ BUG_ON(!folio_test_locked(folio));
+ mapping = folio->mapping;
+ index = folio->index;
inode = mapping->host;
info = SHMEM_I(inode);
if (info->flags & VM_LOCKED)
@@ -1361,15 +1363,15 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
/*
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
* value into swapfile.c, the only way we can correctly account for a
- * fallocated page arriving here is now to initialize it and write it.
+ * fallocated folio arriving here is now to initialize it and write it.
*
- * That's okay for a page already fallocated earlier, but if we have
+ * That's okay for a folio already fallocated earlier, but if we have
* not yet completed the fallocation, then (a) we want to keep track
- * of this page in case we have to undo it, and (b) it may not be a
+ * of this folio in case we have to undo it, and (b) it may not be a
* good idea to continue anyway, once we're pushing into swap. So
- * reactivate the page, and let shmem_fallocate() quit when too many.
+ * reactivate the folio, and let shmem_fallocate() quit when too many.
*/
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
if (inode->i_private) {
struct shmem_falloc *shmem_falloc;
spin_lock(&inode->i_lock);
@@ -1385,9 +1387,9 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (shmem_falloc)
goto redirty;
}
- clear_highpage(page);
- flush_dcache_page(page);
- SetPageUptodate(page);
+ folio_zero_range(folio, 0, folio_size(folio));
+ flush_dcache_folio(folio);
+ folio_mark_uptodate(folio);
}
swap = folio_alloc_swap(folio);
@@ -1396,7 +1398,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
- * if it's not already there. Do it now before the page is
+ * if it's not already there. Do it now before the folio is
* moved to swap cache, when its pagelock no longer protects
* the inode from eviction. But don't unlock the mutex until
* we've incremented swapped, because shmem_unuse_inode() will
@@ -1406,7 +1408,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (list_empty(&info->swaplist))
list_add(&info->swaplist, &shmem_swaplist);
- if (add_to_swap_cache(page, swap,
+ if (add_to_swap_cache(folio, swap,
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
NULL) == 0) {
spin_lock_irq(&info->lock);
@@ -1415,21 +1417,21 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
spin_unlock_irq(&info->lock);
swap_shmem_alloc(swap);
- shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+ shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
mutex_unlock(&shmem_swaplist_mutex);
- BUG_ON(page_mapped(page));
- swap_writepage(page, wbc);
+ BUG_ON(folio_mapped(folio));
+ swap_writepage(&folio->page, wbc);
return 0;
}
mutex_unlock(&shmem_swaplist_mutex);
- put_swap_page(page, swap);
+ put_swap_folio(folio, swap);
redirty:
- set_page_dirty(page);
+ folio_mark_dirty(folio);
if (wbc->for_reclaim)
- return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
- unlock_page(page);
+ return AOP_WRITEPAGE_ACTIVATE; /* Return with folio locked */
+ folio_unlock(folio);
return 0;
}
@@ -1486,7 +1488,7 @@ static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
mpol_cond_put(vma->vm_policy);
}
-static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
+static struct folio *shmem_swapin(swp_entry_t swap, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
struct vm_area_struct pvma;
@@ -1499,7 +1501,9 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
page = swap_cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);
- return page;
+ if (!page)
+ return NULL;
+ return page_folio(page);
}
/*
@@ -1560,12 +1564,6 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
return folio;
}
-static struct page *shmem_alloc_page(gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t index)
-{
- return &shmem_alloc_folio(gfp, info, index)->page;
-}
-
static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
pgoff_t index, bool huge)
{
@@ -1599,7 +1597,7 @@ failed:
/*
* When a page is moved from swapcache to shmem filecache (either by the
- * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
+ * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of
* shmem_unuse_inode()), it may have been read in earlier from swap, in
* ignorance of the mapping it belongs to. If that mapping has special
* constraints (like the gma500 GEM driver, which requires RAM below 4GB),
@@ -1614,54 +1612,52 @@ static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
return folio_zonenum(folio) > gfp_zone(gfp);
}
-static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
- struct page *oldpage, *newpage;
struct folio *old, *new;
struct address_space *swap_mapping;
swp_entry_t entry;
pgoff_t swap_index;
int error;
- oldpage = *pagep;
- entry.val = page_private(oldpage);
+ old = *foliop;
+ entry = folio_swap_entry(old);
swap_index = swp_offset(entry);
- swap_mapping = page_mapping(oldpage);
+ swap_mapping = swap_address_space(entry);
/*
* We have arrived here because our zones are constrained, so don't
* limit chance of success by further cpuset and node constraints.
*/
gfp &= ~GFP_CONSTRAINT_MASK;
- newpage = shmem_alloc_page(gfp, info, index);
- if (!newpage)
+ VM_BUG_ON_FOLIO(folio_test_large(old), old);
+ new = shmem_alloc_folio(gfp, info, index);
+ if (!new)
return -ENOMEM;
- get_page(newpage);
- copy_highpage(newpage, oldpage);
- flush_dcache_page(newpage);
+ folio_get(new);
+ folio_copy(new, old);
+ flush_dcache_folio(new);
- __SetPageLocked(newpage);
- __SetPageSwapBacked(newpage);
- SetPageUptodate(newpage);
- set_page_private(newpage, entry.val);
- SetPageSwapCache(newpage);
+ __folio_set_locked(new);
+ __folio_set_swapbacked(new);
+ folio_mark_uptodate(new);
+ folio_set_swap_entry(new, entry);
+ folio_set_swapcache(new);
/*
* Our caller will very soon move newpage out of swapcache, but it's
* a nice clean interface for us to replace oldpage by newpage there.
*/
xa_lock_irq(&swap_mapping->i_pages);
- error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage);
+ error = shmem_replace_entry(swap_mapping, swap_index, old, new);
if (!error) {
- old = page_folio(oldpage);
- new = page_folio(newpage);
mem_cgroup_migrate(old, new);
- __inc_lruvec_page_state(newpage, NR_FILE_PAGES);
- __inc_lruvec_page_state(newpage, NR_SHMEM);
- __dec_lruvec_page_state(oldpage, NR_FILE_PAGES);
- __dec_lruvec_page_state(oldpage, NR_SHMEM);
+ __lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
+ __lruvec_stat_mod_folio(new, NR_SHMEM, 1);
+ __lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
+ __lruvec_stat_mod_folio(old, NR_SHMEM, -1);
}
xa_unlock_irq(&swap_mapping->i_pages);
@@ -1671,18 +1667,17 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
* both PageSwapCache and page_private after getting page lock;
* but be defensive. Reverse old to newpage for clear and free.
*/
- oldpage = newpage;
+ old = new;
} else {
- lru_cache_add(newpage);
- *pagep = newpage;
+ folio_add_lru(new);
+ *foliop = new;
}
- ClearPageSwapCache(oldpage);
- set_page_private(oldpage, 0);
+ folio_clear_swapcache(old);
+ old->private = NULL;
- unlock_page(oldpage);
- put_page(oldpage);
- put_page(oldpage);
+ folio_unlock(old);
+ folio_put_refs(old, 2);
return error;
}
@@ -1730,7 +1725,6 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
- struct page *page;
struct folio *folio = NULL;
swp_entry_t swap;
int error;
@@ -1743,8 +1737,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
return -EIO;
/* Look it up and read it in.. */
- page = lookup_swap_cache(swap, NULL, 0);
- if (!page) {
+ folio = swap_cache_get_folio(swap, NULL, 0);
+ if (!folio) {
/* Or update major stats only when swapin succeeds?? */
if (fault_type) {
*fault_type |= VM_FAULT_MAJOR;
@@ -1752,13 +1746,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
count_memcg_event_mm(charge_mm, PGMAJFAULT);
}
/* Here we actually start the io */
- page = shmem_swapin(swap, gfp, info, index);
- if (!page) {
+ folio = shmem_swapin(swap, gfp, info, index);
+ if (!folio) {
error = -ENOMEM;
goto failed;
}
}
- folio = page_folio(page);
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
@@ -1781,8 +1774,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
arch_swap_restore(swap, folio);
if (shmem_should_replace_folio(folio, gfp)) {
- error = shmem_replace_page(&page, gfp, info, index);
- folio = page_folio(page);
+ error = shmem_replace_folio(&folio, gfp, info, index);
if (error)
goto failed;
}
@@ -1822,7 +1814,7 @@ unlock:
}
/*
- * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
+ * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate
*
* If we allocate a new one we do not mark it dirty. That's up to the
* vm. If we swap it in we mark it dirty since we also free the swap
@@ -1831,10 +1823,10 @@ unlock:
* vma, vmf, and fault_type are only supplied by shmem_fault:
* otherwise they are NULL.
*/
-static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp, gfp_t gfp,
- struct vm_area_struct *vma, struct vm_fault *vmf,
- vm_fault_t *fault_type)
+static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
+ struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
+ struct vm_area_struct *vma, struct vm_fault *vmf,
+ vm_fault_t *fault_type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1874,7 +1866,7 @@ repeat:
if (error == -EEXIST)
goto repeat;
- *pagep = &folio->page;
+ *foliop = folio;
return error;
}
@@ -1884,7 +1876,7 @@ repeat:
folio_mark_accessed(folio);
if (folio_test_uptodate(folio))
goto out;
- /* fallocated page */
+ /* fallocated folio */
if (sgp != SGP_READ)
goto clear;
folio_unlock(folio);
@@ -1892,10 +1884,10 @@ repeat:
}
/*
- * SGP_READ: succeed on hole, with NULL page, letting caller zero.
- * SGP_NOALLOC: fail on hole, with NULL page, letting caller fail.
+ * SGP_READ: succeed on hole, with NULL folio, letting caller zero.
+ * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail.
*/
- *pagep = NULL;
+ *foliop = NULL;
if (sgp == SGP_READ)
return 0;
if (sgp == SGP_NOALLOC)
@@ -1910,7 +1902,7 @@ repeat:
return 0;
}
- if (!shmem_is_huge(vma, inode, index))
+ if (!shmem_is_huge(vma, inode, index, false))
goto alloc_nohuge;
huge_gfp = vma_thp_gfp_mask(vma);
@@ -1928,7 +1920,7 @@ alloc_nohuge:
if (error != -ENOSPC)
goto unlock;
/*
- * Try to reclaim some space by splitting a huge page
+ * Try to reclaim some space by splitting a large folio
* beyond i_size on the filesystem.
*/
while (retry--) {
@@ -1964,9 +1956,9 @@ alloc_nohuge:
if (folio_test_pmd_mappable(folio) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
- hindex + HPAGE_PMD_NR - 1) {
+ folio_next_index(folio) - 1) {
/*
- * Part of the huge page is beyond i_size: subject
+ * Part of the large folio is beyond i_size: subject
* to shrink under memory pressure.
*/
spin_lock(&sbinfo->shrinklist_lock);
@@ -1983,14 +1975,14 @@ alloc_nohuge:
}
/*
- * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
+ * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio.
*/
if (sgp == SGP_FALLOC)
sgp = SGP_WRITE;
clear:
/*
- * Let SGP_WRITE caller clear ends if write does not fill page;
- * but SGP_FALLOC on a page fallocated earlier must initialize
+ * Let SGP_WRITE caller clear ends if write does not fill folio;
+ * but SGP_FALLOC on a folio fallocated earlier must initialize
* it now, lest undo on failure cancel our earlier guarantee.
*/
if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) {
@@ -2016,7 +2008,7 @@ clear:
goto unlock;
}
out:
- *pagep = folio_page(folio, index - hindex);
+ *foliop = folio;
return 0;
/*
@@ -2046,6 +2038,13 @@ unlock:
return error;
}
+int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
+ enum sgp_type sgp)
+{
+ return shmem_get_folio_gfp(inode, index, foliop, sgp,
+ mapping_gfp_mask(inode->i_mapping), NULL, NULL, NULL);
+}
+
/*
* This is like autoremove_wake_function, but it removes the wait queue
* entry unconditionally - even if something else had already woken the
@@ -2063,6 +2062,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = file_inode(vma->vm_file);
gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+ struct folio *folio = NULL;
int err;
vm_fault_t ret = VM_FAULT_LOCKED;
@@ -2125,10 +2125,12 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
spin_unlock(&inode->i_lock);
}
- err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
+ err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
gfp, vma, vmf, &ret);
if (err)
return vmf_error(err);
+ if (folio)
+ vmf->page = folio_file_page(folio, vmf->pgoff);
return ret;
}
@@ -2330,7 +2332,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
inode_init_owner(&init_user_ns, inode, dir, mode);
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
- inode->i_generation = prandom_u32();
+ inode->i_generation = get_random_u32();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
@@ -2398,7 +2400,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
void *page_kaddr;
struct folio *folio;
- struct page *page;
int ret;
pgoff_t max_off;
@@ -2417,53 +2418,70 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (!*pagep) {
ret = -ENOMEM;
- page = shmem_alloc_page(gfp, info, pgoff);
- if (!page)
+ folio = shmem_alloc_folio(gfp, info, pgoff);
+ if (!folio)
goto out_unacct_blocks;
if (!zeropage) { /* COPY */
- page_kaddr = kmap_atomic(page);
+ page_kaddr = kmap_local_folio(folio, 0);
+ /*
+ * The read mmap_lock is held here. Despite the
+ * mmap_lock being read recursive a deadlock is still
+ * possible if a writer has taken a lock. For example:
+ *
+ * process A thread 1 takes read lock on own mmap_lock
+ * process A thread 2 calls mmap, blocks taking write lock
+ * process B thread 1 takes page fault, read lock on own mmap lock
+ * process B thread 2 calls mmap, blocks taking write lock
+ * process A thread 1 blocks taking read lock on process B
+ * process B thread 1 blocks taking read lock on process A
+ *
+ * Disable page faults to prevent potential deadlock
+ * and retry the copy outside the mmap_lock.
+ */
+ pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *)src_addr,
PAGE_SIZE);
- kunmap_atomic(page_kaddr);
+ pagefault_enable();
+ kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
- *pagep = page;
+ *pagep = &folio->page;
ret = -ENOENT;
/* don't free the page */
goto out_unacct_blocks;
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
} else { /* ZEROPAGE */
- clear_user_highpage(page, dst_addr);
+ clear_user_highpage(&folio->page, dst_addr);
}
} else {
- page = *pagep;
+ folio = page_folio(*pagep);
+ VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
*pagep = NULL;
}
- VM_BUG_ON(PageLocked(page));
- VM_BUG_ON(PageSwapBacked(page));
- __SetPageLocked(page);
- __SetPageSwapBacked(page);
- __SetPageUptodate(page);
+ VM_BUG_ON(folio_test_locked(folio));
+ VM_BUG_ON(folio_test_swapbacked(folio));
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+ __folio_mark_uptodate(folio);
ret = -EFAULT;
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(pgoff >= max_off))
goto out_release;
- folio = page_folio(page);
ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
gfp & GFP_RECLAIM_MASK, dst_mm);
if (ret)
goto out_release;
ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- page, true, wp_copy);
+ &folio->page, true, wp_copy);
if (ret)
goto out_delete_from_cache;
@@ -2473,13 +2491,13 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
- unlock_page(page);
+ folio_unlock(folio);
return 0;
out_delete_from_cache:
- delete_from_page_cache(page);
+ filemap_remove_folio(folio);
out_release:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out_unacct_blocks:
shmem_inode_unacct_blocks(inode, 1);
return ret;
@@ -2498,6 +2516,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t index = pos >> PAGE_SHIFT;
+ struct folio *folio;
int ret = 0;
/* i_rwsem is held by caller */
@@ -2509,14 +2528,15 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
return -EPERM;
}
- ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
+ ret = shmem_get_folio(inode, index, &folio, SGP_WRITE);
if (ret)
return ret;
+ *pagep = folio_file_page(folio, index);
if (PageHWPoison(*pagep)) {
- unlock_page(*pagep);
- put_page(*pagep);
+ folio_unlock(folio);
+ folio_put(folio);
*pagep = NULL;
return -EIO;
}
@@ -2575,6 +2595,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
offset = *ppos & ~PAGE_MASK;
for (;;) {
+ struct folio *folio = NULL;
struct page *page = NULL;
pgoff_t end_index;
unsigned long nr, ret;
@@ -2589,17 +2610,18 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
break;
}
- error = shmem_getpage(inode, index, &page, SGP_READ);
+ error = shmem_get_folio(inode, index, &folio, SGP_READ);
if (error) {
if (error == -EINVAL)
error = 0;
break;
}
- if (page) {
- unlock_page(page);
+ if (folio) {
+ folio_unlock(folio);
+ page = folio_file_page(folio, index);
if (PageHWPoison(page)) {
- put_page(page);
+ folio_put(folio);
error = -EIO;
break;
}
@@ -2615,14 +2637,14 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (index == end_index) {
nr = i_size & ~PAGE_MASK;
if (nr <= offset) {
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
break;
}
}
nr -= offset;
- if (page) {
+ if (folio) {
/*
* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing
@@ -2634,13 +2656,13 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
* Mark the page accessed if we read the beginning.
*/
if (!offset)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
/*
* Ok, we have the page, and it's up-to-date, so
* now we can copy it to user space...
*/
ret = copy_page_to_iter(page, offset, nr, to);
- put_page(page);
+ folio_put(folio);
} else if (user_backed_iter(to)) {
/*
@@ -2783,7 +2805,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
info->fallocend = end;
for (index = start; index < end; ) {
- struct page *page;
+ struct folio *folio;
/*
* Good, the fallocate(2) manpage permits EINTR: we may have
@@ -2794,10 +2816,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
else
- error = shmem_getpage(inode, index, &page, SGP_FALLOC);
+ error = shmem_get_folio(inode, index, &folio,
+ SGP_FALLOC);
if (error) {
info->fallocend = undo_fallocend;
- /* Remove the !PageUptodate pages we added */
+ /* Remove the !uptodate folios we added */
if (index > start) {
shmem_undo_range(inode,
(loff_t)start << PAGE_SHIFT,
@@ -2806,37 +2829,34 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
goto undone;
}
- index++;
/*
* Here is a more important optimization than it appears:
- * a second SGP_FALLOC on the same huge page will clear it,
- * making it PageUptodate and un-undoable if we fail later.
+ * a second SGP_FALLOC on the same large folio will clear it,
+ * making it uptodate and un-undoable if we fail later.
*/
- if (PageTransCompound(page)) {
- index = round_up(index, HPAGE_PMD_NR);
- /* Beware 32-bit wraparound */
- if (!index)
- index--;
- }
+ index = folio_next_index(folio);
+ /* Beware 32-bit wraparound */
+ if (!index)
+ index--;
/*
* Inform shmem_writepage() how far we have reached.
* No need for lock or barrier: we have the page lock.
*/
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
shmem_falloc.nr_falloced += index - shmem_falloc.next;
shmem_falloc.next = index;
/*
- * If !PageUptodate, leave it that way so that freeable pages
+ * If !uptodate, leave it that way so that freeable folios
* can be recognized if we need to rollback on error later.
- * But set_page_dirty so that memory pressure will swap rather
- * than free the pages we are allocating (and SGP_CACHE pages
+ * But mark it dirty so that memory pressure will swap rather
+ * than free the folios we are allocating (and SGP_CACHE folios
* might still be clean: we now need to mark those dirty too).
*/
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
cond_resched();
}
@@ -2901,6 +2921,7 @@ shmem_mknod(struct user_namespace *mnt_userns, struct inode *dir,
error = 0;
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = current_time(dir);
+ inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
}
@@ -2912,7 +2933,7 @@ out_iput:
static int
shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct file *file, umode_t mode)
{
struct inode *inode;
int error = -ENOSPC;
@@ -2927,9 +2948,9 @@ shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
error = simple_acl_create(dir, inode);
if (error)
goto out_iput;
- d_tmpfile(dentry, inode);
+ d_tmpfile(file, inode);
}
- return error;
+ return finish_open_simple(file, error);
out_iput:
iput(inode);
return error;
@@ -2976,6 +2997,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ inode_inc_iversion(dir);
inc_nlink(inode);
ihold(inode); /* New dentry reference */
dget(dentry); /* Extra pinning count for the created dentry */
@@ -2993,6 +3015,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
dir->i_size -= BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ inode_inc_iversion(dir);
drop_nlink(inode);
dput(dentry); /* Undo the count from "create" - this does all the work */
return 0;
@@ -3082,6 +3105,8 @@ static int shmem_rename2(struct user_namespace *mnt_userns,
old_dir->i_ctime = old_dir->i_mtime =
new_dir->i_ctime = new_dir->i_mtime =
inode->i_ctime = current_time(old_dir);
+ inode_inc_iversion(old_dir);
+ inode_inc_iversion(new_dir);
return 0;
}
@@ -3091,7 +3116,7 @@ static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
int error;
int len;
struct inode *inode;
- struct page *page;
+ struct folio *folio;
len = strlen(symname) + 1;
if (len > PAGE_SIZE)
@@ -3119,21 +3144,22 @@ static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
- error = shmem_getpage(inode, 0, &page, SGP_WRITE);
+ error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
if (error) {
iput(inode);
return error;
}
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
- memcpy(page_address(page), symname, len);
- SetPageUptodate(page);
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ memcpy(folio_address(folio), symname, len);
+ folio_mark_uptodate(folio);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
}
dir->i_size += BOGO_DIRENT_SIZE;
dir->i_ctime = dir->i_mtime = current_time(dir);
+ inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry);
return 0;
@@ -3141,40 +3167,41 @@ static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
static void shmem_put_link(void *arg)
{
- mark_page_accessed(arg);
- put_page(arg);
+ folio_mark_accessed(arg);
+ folio_put(arg);
}
static const char *shmem_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
int error;
+
if (!dentry) {
- page = find_get_page(inode->i_mapping, 0);
- if (!page)
+ folio = filemap_get_folio(inode->i_mapping, 0);
+ if (!folio)
return ERR_PTR(-ECHILD);
- if (PageHWPoison(page) ||
- !PageUptodate(page)) {
- put_page(page);
+ if (PageHWPoison(folio_page(folio, 0)) ||
+ !folio_test_uptodate(folio)) {
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
} else {
- error = shmem_getpage(inode, 0, &page, SGP_READ);
+ error = shmem_get_folio(inode, 0, &folio, SGP_READ);
if (error)
return ERR_PTR(error);
- if (!page)
+ if (!folio)
return ERR_PTR(-ECHILD);
- if (PageHWPoison(page)) {
- unlock_page(page);
- put_page(page);
+ if (PageHWPoison(folio_page(folio, 0))) {
+ folio_unlock(folio);
+ folio_put(folio);
return ERR_PTR(-ECHILD);
}
- unlock_page(page);
+ folio_unlock(folio);
}
- set_delayed_call(done, shmem_put_link, page);
- return page_address(page);
+ set_delayed_call(done, shmem_put_link, folio);
+ return folio_address(folio);
}
#ifdef CONFIG_TMPFS_XATTR
@@ -3204,6 +3231,7 @@ static int shmem_fileattr_set(struct user_namespace *mnt_userns,
shmem_set_inode_flags(inode, info->fsflags);
inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
return 0;
}
@@ -3267,9 +3295,15 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ int err;
name = xattr_full_name(handler, name);
- return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
+ err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
+ if (!err) {
+ inode->i_ctime = current_time(inode);
+ inode_inc_iversion(inode);
+ }
+ return err;
}
static const struct xattr_handler shmem_security_xattr_handler = {
@@ -3732,7 +3766,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOUSER;
}
sb->s_export_op = &shmem_export_ops;
- sb->s_flags |= SB_NOSEC;
+ sb->s_flags |= SB_NOSEC | SB_I_VERSION;
#else
sb->s_flags |= SB_NOUSER;
#endif
@@ -4266,18 +4300,20 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
{
#ifdef CONFIG_SHMEM
struct inode *inode = mapping->host;
+ struct folio *folio;
struct page *page;
int error;
BUG_ON(!shmem_mapping(mapping));
- error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
+ error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
gfp, NULL, NULL, NULL);
if (error)
return ERR_PTR(error);
- unlock_page(page);
+ folio_unlock(folio);
+ page = folio_file_page(folio, index);
if (PageHWPoison(page)) {
- put_page(page);
+ folio_put(folio);
return ERR_PTR(-EIO);
}