aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c560
1 files changed, 496 insertions, 64 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index f99ff3e50bd6..a15a466d0d1d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -53,6 +53,7 @@ static struct vfsmount *shm_mnt;
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/percpu_counter.h>
+#include <linux/falloc.h>
#include <linux/splice.h>
#include <linux/security.h>
#include <linux/swapops.h>
@@ -83,12 +84,25 @@ struct shmem_xattr {
char value[0];
};
+/*
+ * shmem_fallocate and shmem_writepage communicate via inode->i_private
+ * (with i_mutex making sure that it has only one user at a time):
+ * we would prefer not to enlarge the shmem inode just for that.
+ */
+struct shmem_falloc {
+ pgoff_t start; /* start of range currently being fallocated */
+ pgoff_t next; /* the next page offset to be fallocated */
+ pgoff_t nr_falloced; /* how many new pages have been fallocated */
+ pgoff_t nr_unswapped; /* how often writepage refused to swap out */
+};
+
/* Flag allocation requirements to shmem_getpage */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
- SGP_WRITE, /* may exceed i_size, may allocate page */
+ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
+ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};
#ifdef CONFIG_TMPFS
@@ -103,6 +117,9 @@ static unsigned long shmem_default_max_inodes(void)
}
#endif
+static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
+static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ struct shmem_inode_info *info, pgoff_t index);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type);
@@ -423,27 +440,31 @@ void shmem_unlock_mapping(struct address_space *mapping)
/*
* Remove range of pages and swap entries from radix tree, and free them.
+ * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
*/
-void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
+ bool unfalloc)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
- pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
+ pgoff_t end = (lend + 1) >> PAGE_CACHE_SHIFT;
+ unsigned int partial_start = lstart & (PAGE_CACHE_SIZE - 1);
+ unsigned int partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
struct pagevec pvec;
pgoff_t indices[PAGEVEC_SIZE];
long nr_swaps_freed = 0;
pgoff_t index;
int i;
- BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
+ if (lend == -1)
+ end = -1; /* unsigned, so actually very big */
pagevec_init(&pvec, 0);
index = start;
- while (index <= end) {
+ while (index < end) {
pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE),
pvec.pages, indices);
if (!pvec.nr)
break;
@@ -452,10 +473,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
struct page *page = pvec.pages[i];
index = indices[i];
- if (index > end)
+ if (index >= end)
break;
if (radix_tree_exceptional_entry(page)) {
+ if (unfalloc)
+ continue;
nr_swaps_freed += !shmem_free_swap(mapping,
index, page);
continue;
@@ -463,9 +486,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
if (!trylock_page(page))
continue;
- if (page->mapping == mapping) {
- VM_BUG_ON(PageWriteback(page));
- truncate_inode_page(mapping, page);
+ if (!unfalloc || !PageUptodate(page)) {
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
+ }
}
unlock_page(page);
}
@@ -476,30 +501,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
index++;
}
- if (partial) {
+ if (partial_start) {
struct page *page = NULL;
shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
if (page) {
- zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+ unsigned int top = PAGE_CACHE_SIZE;
+ if (start > end) {
+ top = partial_end;
+ partial_end = 0;
+ }
+ zero_user_segment(page, partial_start, top);
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ }
+ if (partial_end) {
+ struct page *page = NULL;
+ shmem_getpage(inode, end, &page, SGP_READ, NULL);
+ if (page) {
+ zero_user_segment(page, 0, partial_end);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
}
}
+ if (start >= end)
+ return;
index = start;
for ( ; ; ) {
cond_resched();
pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE),
pvec.pages, indices);
if (!pvec.nr) {
- if (index == start)
+ if (index == start || unfalloc)
break;
index = start;
continue;
}
- if (index == start && indices[0] > end) {
+ if ((index == start || unfalloc) && indices[0] >= end) {
shmem_deswap_pagevec(&pvec);
pagevec_release(&pvec);
break;
@@ -509,19 +551,23 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
struct page *page = pvec.pages[i];
index = indices[i];
- if (index > end)
+ if (index >= end)
break;
if (radix_tree_exceptional_entry(page)) {
+ if (unfalloc)
+ continue;
nr_swaps_freed += !shmem_free_swap(mapping,
index, page);
continue;
}
lock_page(page);
- if (page->mapping == mapping) {
- VM_BUG_ON(PageWriteback(page));
- truncate_inode_page(mapping, page);
+ if (!unfalloc || !PageUptodate(page)) {
+ if (page->mapping == mapping) {
+ VM_BUG_ON(PageWriteback(page));
+ truncate_inode_page(mapping, page);
+ }
}
unlock_page(page);
}
@@ -535,7 +581,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
info->swapped -= nr_swaps_freed;
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+}
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
+{
+ shmem_undo_range(inode, lstart, lend, false);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -597,19 +647,20 @@ static void shmem_evict_inode(struct inode *inode)
}
BUG_ON(inode->i_blocks);
shmem_free_inode(inode->i_sb);
- end_writeback(inode);
+ clear_inode(inode);
}
/*
* If swap found in inode, free it and move page from swapcache to filecache.
*/
static int shmem_unuse_inode(struct shmem_inode_info *info,
- swp_entry_t swap, struct page *page)
+ swp_entry_t swap, struct page **pagep)
{
struct address_space *mapping = info->vfs_inode.i_mapping;
void *radswap;
pgoff_t index;
- int error;
+ gfp_t gfp;
+ int error = 0;
radswap = swp_to_radix_entry(swap);
index = radix_tree_locate_item(&mapping->page_tree, radswap);
@@ -625,22 +676,48 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
if (shmem_swaplist.next != &info->swaplist)
list_move_tail(&shmem_swaplist, &info->swaplist);
+ gfp = mapping_gfp_mask(mapping);
+ if (shmem_should_replace_page(*pagep, gfp)) {
+ mutex_unlock(&shmem_swaplist_mutex);
+ error = shmem_replace_page(pagep, gfp, info, index);
+ mutex_lock(&shmem_swaplist_mutex);
+ /*
+ * We needed to drop mutex to make that restrictive page
+ * allocation, but the inode might have been freed while we
+ * dropped it: although a racing shmem_evict_inode() cannot
+ * complete without emptying the radix_tree, our page lock
+ * on this swapcache page is not enough to prevent that -
+ * free_swap_and_cache() of our swap entry will only
+ * trylock_page(), removing swap from radix_tree whatever.
+ *
+ * We must not proceed to shmem_add_to_page_cache() if the
+ * inode has been freed, but of course we cannot rely on
+ * inode or mapping or info to check that. However, we can
+ * safely check if our swap entry is still in use (and here
+ * it can't have got reused for another page): if it's still
+ * in use, then the inode cannot have been freed yet, and we
+ * can safely proceed (if it's no longer in use, that tells
+ * nothing about the inode, but we don't need to unuse swap).
+ */
+ if (!page_swapcount(*pagep))
+ error = -ENOENT;
+ }
+
/*
* We rely on shmem_swaplist_mutex, not only to protect the swaplist,
* but also to hold up shmem_evict_inode(): so inode cannot be freed
* beneath us (pagelock doesn't help until the page is in pagecache).
*/
- error = shmem_add_to_page_cache(page, mapping, index,
+ if (!error)
+ error = shmem_add_to_page_cache(*pagep, mapping, index,
GFP_NOWAIT, radswap);
- /* which does mem_cgroup_uncharge_cache_page on error */
-
if (error != -ENOMEM) {
/*
* Truncation and eviction use free_swap_and_cache(), which
* only does trylock page: if we raced, best clean up here.
*/
- delete_from_swap_cache(page);
- set_page_dirty(page);
+ delete_from_swap_cache(*pagep);
+ set_page_dirty(*pagep);
if (!error) {
spin_lock(&info->lock);
info->swapped--;
@@ -660,7 +737,14 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
struct list_head *this, *next;
struct shmem_inode_info *info;
int found = 0;
- int error;
+ int error = 0;
+
+ /*
+ * There's a faint possibility that swap page was replaced before
+ * caller locked it: caller will come back later with the right page.
+ */
+ if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
+ goto out;
/*
* Charge page using GFP_KERNEL while we can wait, before taking
@@ -676,7 +760,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
list_for_each_safe(this, next, &shmem_swaplist) {
info = list_entry(this, struct shmem_inode_info, swaplist);
if (info->swapped)
- found = shmem_unuse_inode(info, swap, page);
+ found = shmem_unuse_inode(info, swap, &page);
else
list_del_init(&info->swaplist);
cond_resched();
@@ -685,8 +769,6 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
}
mutex_unlock(&shmem_swaplist_mutex);
- if (!found)
- mem_cgroup_uncharge_cache_page(page);
if (found < 0)
error = found;
out:
@@ -727,6 +809,38 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
goto redirty;
}
+
+ /*
+ * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
+ * value into swapfile.c, the only way we can correctly account for a
+ * fallocated page arriving here is now to initialize it and write it.
+ *
+ * That's okay for a page already fallocated earlier, but if we have
+ * not yet completed the fallocation, then (a) we want to keep track
+ * of this page in case we have to undo it, and (b) it may not be a
+ * good idea to continue anyway, once we're pushing into swap. So
+ * reactivate the page, and let shmem_fallocate() quit when too many.
+ */
+ if (!PageUptodate(page)) {
+ if (inode->i_private) {
+ struct shmem_falloc *shmem_falloc;
+ spin_lock(&inode->i_lock);
+ shmem_falloc = inode->i_private;
+ if (shmem_falloc &&
+ index >= shmem_falloc->start &&
+ index < shmem_falloc->next)
+ shmem_falloc->nr_unswapped++;
+ else
+ shmem_falloc = NULL;
+ spin_unlock(&inode->i_lock);
+ if (shmem_falloc)
+ goto redirty;
+ }
+ clear_highpage(page);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ }
+
swap = get_swap_page();
if (!swap.val)
goto redirty;
@@ -856,6 +970,89 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
#endif
/*
+ * When a page is moved from swapcache to shmem filecache (either by the
+ * usual swapin of shmem_getpage_gfp(), or by the less common swapoff of
+ * shmem_unuse_inode()), it may have been read in earlier from swap, in
+ * ignorance of the mapping it belongs to. If that mapping has special
+ * constraints (like the gma500 GEM driver, which requires RAM below 4GB),
+ * we may need to copy to a suitable page before moving to filecache.
+ *
+ * In a future release, this may well be extended to respect cpuset and
+ * NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
+ * but for now it is a simple matter of zone.
+ */
+static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
+{
+ return page_zonenum(page) > gfp_zone(gfp);
+}
+
+static int shmem_replace_page(struct page **pagep, gfp_t gfp,
+ struct shmem_inode_info *info, pgoff_t index)
+{
+ struct page *oldpage, *newpage;
+ struct address_space *swap_mapping;
+ pgoff_t swap_index;
+ int error;
+
+ oldpage = *pagep;
+ swap_index = page_private(oldpage);
+ swap_mapping = page_mapping(oldpage);
+
+ /*
+ * We have arrived here because our zones are constrained, so don't
+ * limit chance of success by further cpuset and node constraints.
+ */
+ gfp &= ~GFP_CONSTRAINT_MASK;
+ newpage = shmem_alloc_page(gfp, info, index);
+ if (!newpage)
+ return -ENOMEM;
+
+ page_cache_get(newpage);
+ copy_highpage(newpage, oldpage);
+ flush_dcache_page(newpage);
+
+ __set_page_locked(newpage);
+ SetPageUptodate(newpage);
+ SetPageSwapBacked(newpage);
+ set_page_private(newpage, swap_index);
+ SetPageSwapCache(newpage);
+
+ /*
+ * Our caller will very soon move newpage out of swapcache, but it's
+ * a nice clean interface for us to replace oldpage by newpage there.
+ */
+ spin_lock_irq(&swap_mapping->tree_lock);
+ error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
+ newpage);
+ if (!error) {
+ __inc_zone_page_state(newpage, NR_FILE_PAGES);
+ __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+ }
+ spin_unlock_irq(&swap_mapping->tree_lock);
+
+ if (unlikely(error)) {
+ /*
+ * Is this possible? I think not, now that our callers check
+ * both PageSwapCache and page_private after getting page lock;
+ * but be defensive. Reverse old to newpage for clear and free.
+ */
+ oldpage = newpage;
+ } else {
+ mem_cgroup_replace_page_cache(oldpage, newpage);
+ lru_cache_add_anon(newpage);
+ *pagep = newpage;
+ }
+
+ ClearPageSwapCache(oldpage);
+ set_page_private(oldpage, 0);
+
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ page_cache_release(oldpage);
+ return error;
+}
+
+/*
* shmem_getpage_gfp - find page in cache, or get from swap, or allocate
*
* If we allocate a new one we do not mark it dirty. That's up to the
@@ -872,6 +1069,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
swp_entry_t swap;
int error;
int once = 0;
+ int alloced = 0;
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
return -EFBIG;
@@ -883,19 +1081,21 @@ repeat:
page = NULL;
}
- if (sgp != SGP_WRITE &&
+ if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
goto failed;
}
+ /* fallocated page? */
+ if (page && !PageUptodate(page)) {
+ if (sgp != SGP_READ)
+ goto clear;
+ unlock_page(page);
+ page_cache_release(page);
+ page = NULL;
+ }
if (page || (sgp == SGP_READ && !swap.val)) {
- /*
- * Once we can get the page lock, it must be uptodate:
- * if there were an error in reading back from swap,
- * the page would not be inserted into the filecache.
- */
- BUG_ON(page && !PageUptodate(page));
*pagep = page;
return 0;
}
@@ -923,19 +1123,21 @@ repeat:
/* We have to do this with page locked to prevent races */
lock_page(page);
+ if (!PageSwapCache(page) || page_private(page) != swap.val ||
+ page->mapping) {
+ error = -EEXIST; /* try again */
+ goto failed;
+ }
if (!PageUptodate(page)) {
error = -EIO;
goto failed;
}
wait_on_page_writeback(page);
- /* Someone may have already done it for us */
- if (page->mapping) {
- if (page->mapping == mapping &&
- page->index == index)
- goto done;
- error = -EEXIST;
- goto failed;
+ if (shmem_should_replace_page(page, gfp)) {
+ error = shmem_replace_page(&page, gfp, info, index);
+ if (error)
+ goto failed;
}
error = mem_cgroup_cache_charge(page, current->mm,
@@ -991,19 +1193,36 @@ repeat:
inode->i_blocks += BLOCKS_PER_PAGE;
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
+ alloced = true;
- clear_highpage(page);
- flush_dcache_page(page);
- SetPageUptodate(page);
+ /*
+ * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
+ */
+ if (sgp == SGP_FALLOC)
+ sgp = SGP_WRITE;
+clear:
+ /*
+ * Let SGP_WRITE caller clear ends if write does not fill page;
+ * but SGP_FALLOC on a page fallocated earlier must initialize
+ * it now, lest undo on failure cancel our earlier guarantee.
+ */
+ if (sgp != SGP_WRITE) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ }
if (sgp == SGP_DIRTY)
set_page_dirty(page);
}
-done:
+
/* Perhaps the file has been truncated since we checked */
- if (sgp != SGP_WRITE &&
+ if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
- goto trunc;
+ if (alloced)
+ goto trunc;
+ else
+ goto failed;
}
*pagep = page;
return 0;
@@ -1012,6 +1231,7 @@ done:
* Error recovery.
*/
trunc:
+ info = SHMEM_I(inode);
ClearPageDirty(page);
delete_from_page_cache(page);
spin_lock(&info->lock);
@@ -1019,6 +1239,7 @@ trunc:
inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&info->lock);
decused:
+ sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks)
percpu_counter_add(&sbinfo->used_blocks, -1);
unacct:
@@ -1204,6 +1425,14 @@ shmem_write_end(struct file *file, struct address_space *mapping,
if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
+ if (!PageUptodate(page)) {
+ if (copied < PAGE_CACHE_SIZE) {
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ zero_user_segments(page, 0, from,
+ from + copied, PAGE_CACHE_SIZE);
+ }
+ SetPageUptodate(page);
+ }
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
@@ -1462,6 +1691,199 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
return error;
}
+/*
+ * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
+ */
+static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
+ pgoff_t index, pgoff_t end, int origin)
+{
+ struct page *page;
+ struct pagevec pvec;
+ pgoff_t indices[PAGEVEC_SIZE];
+ bool done = false;
+ int i;
+
+ pagevec_init(&pvec, 0);
+ pvec.nr = 1; /* start small: we may be there already */
+ while (!done) {
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+ pvec.nr, pvec.pages, indices);
+ if (!pvec.nr) {
+ if (origin == SEEK_DATA)
+ index = end;
+ break;
+ }
+ for (i = 0; i < pvec.nr; i++, index++) {
+ if (index < indices[i]) {
+ if (origin == SEEK_HOLE) {
+ done = true;
+ break;
+ }
+ index = indices[i];
+ }
+ page = pvec.pages[i];
+ if (page && !radix_tree_exceptional_entry(page)) {
+ if (!PageUptodate(page))
+ page = NULL;
+ }
+ if (index >= end ||
+ (page && origin == SEEK_DATA) ||
+ (!page && origin == SEEK_HOLE)) {
+ done = true;
+ break;
+ }
+ }
+ shmem_deswap_pagevec(&pvec);
+ pagevec_release(&pvec);
+ pvec.nr = PAGEVEC_SIZE;
+ cond_resched();
+ }
+ return index;
+}
+
+static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct address_space *mapping;
+ struct inode *inode;
+ pgoff_t start, end;
+ loff_t new_offset;
+
+ if (origin != SEEK_DATA && origin != SEEK_HOLE)
+ return generic_file_llseek_size(file, offset, origin,
+ MAX_LFS_FILESIZE);
+ mapping = file->f_mapping;
+ inode = mapping->host;
+ mutex_lock(&inode->i_mutex);
+ /* We're holding i_mutex so we can access i_size directly */
+
+ if (offset < 0)
+ offset = -EINVAL;
+ else if (offset >= inode->i_size)
+ offset = -ENXIO;
+ else {
+ start = offset >> PAGE_CACHE_SHIFT;
+ end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ new_offset = shmem_seek_hole_data(mapping, start, end, origin);
+ new_offset <<= PAGE_CACHE_SHIFT;
+ if (new_offset > offset) {
+ if (new_offset < inode->i_size)
+ offset = new_offset;
+ else if (origin == SEEK_DATA)
+ offset = -ENXIO;
+ else
+ offset = inode->i_size;
+ }
+ }
+
+ if (offset >= 0 && offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ mutex_unlock(&inode->i_mutex);
+ return offset;
+}
+
+static long shmem_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct shmem_falloc shmem_falloc;
+ pgoff_t start, index, end;
+ int error;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ struct address_space *mapping = file->f_mapping;
+ loff_t unmap_start = round_up(offset, PAGE_SIZE);
+ loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+
+ if ((u64)unmap_end > (u64)unmap_start)
+ unmap_mapping_range(mapping, unmap_start,
+ 1 + unmap_end - unmap_start, 0);
+ shmem_truncate_range(inode, offset, offset + len - 1);
+ /* No need to unmap again: hole-punching leaves COWed pages */
+ error = 0;
+ goto out;
+ }
+
+ /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
+ error = inode_newsize_ok(inode, offset + len);
+ if (error)
+ goto out;
+
+ start = offset >> PAGE_CACHE_SHIFT;
+ end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ /* Try to avoid a swapstorm if len is impossible to satisfy */
+ if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) {
+ error = -ENOSPC;
+ goto out;
+ }
+
+ shmem_falloc.start = start;
+ shmem_falloc.next = start;
+ shmem_falloc.nr_falloced = 0;
+ shmem_falloc.nr_unswapped = 0;
+ spin_lock(&inode->i_lock);
+ inode->i_private = &shmem_falloc;
+ spin_unlock(&inode->i_lock);
+
+ for (index = start; index < end; index++) {
+ struct page *page;
+
+ /*
+ * Good, the fallocate(2) manpage permits EINTR: we may have
+ * been interrupted because we are using up too much memory.
+ */
+ if (signal_pending(current))
+ error = -EINTR;
+ else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
+ error = -ENOMEM;
+ else
+ error = shmem_getpage(inode, index, &page, SGP_FALLOC,
+ NULL);
+ if (error) {
+ /* Remove the !PageUptodate pages we added */
+ shmem_undo_range(inode,
+ (loff_t)start << PAGE_CACHE_SHIFT,
+ (loff_t)index << PAGE_CACHE_SHIFT, true);
+ goto undone;
+ }
+
+ /*
+ * Inform shmem_writepage() how far we have reached.
+ * No need for lock or barrier: we have the page lock.
+ */
+ shmem_falloc.next++;
+ if (!PageUptodate(page))
+ shmem_falloc.nr_falloced++;
+
+ /*
+ * If !PageUptodate, leave it that way so that freeable pages
+ * can be recognized if we need to rollback on error later.
+ * But set_page_dirty so that memory pressure will swap rather
+ * than free the pages we are allocating (and SGP_CACHE pages
+ * might still be clean: we now need to mark those dirty too).
+ */
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+ cond_resched();
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
+ i_size_write(inode, offset + len);
+ inode->i_ctime = CURRENT_TIME;
+undone:
+ spin_lock(&inode->i_lock);
+ inode->i_private = NULL;
+ spin_unlock(&inode->i_lock);
+out:
+ mutex_unlock(&inode->i_mutex);
+ return error;
+}
+
static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -1665,6 +2087,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
kaddr = kmap_atomic(page);
memcpy(kaddr, symname, len);
kunmap_atomic(kaddr);
+ SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
@@ -2033,11 +2456,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
return dentry;
}
-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
- int connectable)
+static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
+ struct inode *parent)
{
- struct inode *inode = dentry->d_inode;
-
if (*len < 3) {
*len = 3;
return 255;
@@ -2075,6 +2496,8 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
bool remount)
{
char *this_char, *value, *rest;
+ uid_t uid;
+ gid_t gid;
while (options != NULL) {
this_char = options;
@@ -2134,15 +2557,21 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
} else if (!strcmp(this_char,"uid")) {
if (remount)
continue;
- sbinfo->uid = simple_strtoul(value, &rest, 0);
+ uid = simple_strtoul(value, &rest, 0);
if (*rest)
goto bad_val;
+ sbinfo->uid = make_kuid(current_user_ns(), uid);
+ if (!uid_valid(sbinfo->uid))
+ goto bad_val;
} else if (!strcmp(this_char,"gid")) {
if (remount)
continue;
- sbinfo->gid = simple_strtoul(value, &rest, 0);
+ gid = simple_strtoul(value, &rest, 0);
if (*rest)
goto bad_val;
+ sbinfo->gid = make_kgid(current_user_ns(), gid);
+ if (!gid_valid(sbinfo->gid))
+ goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
if (mpol_parse_str(value, &sbinfo->mpol, 1))
goto bad_val;
@@ -2210,10 +2639,12 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
if (sbinfo->mode != (S_IRWXUGO | S_ISVTX))
seq_printf(seq, ",mode=%03ho", sbinfo->mode);
- if (sbinfo->uid != 0)
- seq_printf(seq, ",uid=%u", sbinfo->uid);
- if (sbinfo->gid != 0)
- seq_printf(seq, ",gid=%u", sbinfo->gid);
+ if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID))
+ seq_printf(seq, ",uid=%u",
+ from_kuid_munged(&init_user_ns, sbinfo->uid));
+ if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID))
+ seq_printf(seq, ",gid=%u",
+ from_kgid_munged(&init_user_ns, sbinfo->gid));
shmem_show_mpol(seq, sbinfo->mpol);
return 0;
}
@@ -2260,6 +2691,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
}
}
sb->s_export_op = &shmem_export_ops;
+ sb->s_flags |= MS_NOSEC;
#else
sb->s_flags |= MS_NOUSER;
#endif
@@ -2354,7 +2786,7 @@ static const struct address_space_operations shmem_aops = {
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
#ifdef CONFIG_TMPFS
- .llseek = generic_file_llseek,
+ .llseek = shmem_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = shmem_file_aio_read,
@@ -2362,12 +2794,12 @@ static const struct file_operations shmem_file_operations = {
.fsync = noop_fsync,
.splice_read = shmem_file_splice_read,
.splice_write = generic_file_splice_write,
+ .fallocate = shmem_fallocate,
#endif
};
static const struct inode_operations shmem_inode_operations = {
.setattr = shmem_setattr,
- .truncate_range = shmem_truncate_range,
#ifdef CONFIG_TMPFS_XATTR
.setxattr = shmem_setxattr,
.getxattr = shmem_getxattr,