aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/callback_xdr.c13
-rw-r--r--fs/nfs/dir.c49
-rw-r--r--fs/nfs/direct.c12
-rw-r--r--fs/nfs/export.c9
-rw-r--r--fs/nfs/file.c125
-rw-r--r--fs/nfs/filelayout/filelayout.c10
-rw-r--r--fs/nfs/fscache.c16
-rw-r--r--fs/nfs/inode.c22
-rw-r--r--fs/nfs/internal.h48
-rw-r--r--fs/nfs/namespace.c10
-rw-r--r--fs/nfs/nfs3_fs.h2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs42proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4proc.c14
-rw-r--r--fs/nfs/nfs4trace.h42
-rw-r--r--fs/nfs/nfstrace.h58
-rw-r--r--fs/nfs/pagelist.c218
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/pnfs.h10
-rw-r--r--fs/nfs/pnfs_nfs.c18
-rw-r--r--fs/nfs/read.c94
-rw-r--r--fs/nfs/write.c382
-rw-r--r--fs/nfs_common/grace.c1
-rw-r--r--fs/nfsd/fault_inject.c142
-rw-r--r--fs/nfsd/filecache.c150
-rw-r--r--fs/nfsd/filecache.h5
-rw-r--r--fs/nfsd/netns.h3
-rw-r--r--fs/nfsd/nfs2acl.c9
-rw-r--r--fs/nfsd/nfs3acl.c9
-rw-r--r--fs/nfsd/nfs3proc.c7
-rw-r--r--fs/nfsd/nfs4layouts.c4
-rw-r--r--fs/nfsd/nfs4proc.c210
-rw-r--r--fs/nfsd/nfs4recover.c6
-rw-r--r--fs/nfsd/nfs4state.c182
-rw-r--r--fs/nfsd/nfs4xdr.c17
-rw-r--r--fs/nfsd/nfscache.c4
-rw-r--r--fs/nfsd/nfsctl.c84
-rw-r--r--fs/nfsd/nfsd.h8
-rw-r--r--fs/nfsd/nfsfh.c44
-rw-r--r--fs/nfsd/nfsfh.h29
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c25
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nfsd/trace.h83
-rw-r--r--fs/nfsd/vfs.c49
-rw-r--r--fs/nfsd/vfs.h7
-rw-r--r--fs/nfsd/xdr4.h2
49 files changed, 1176 insertions, 1086 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 1ead5bd740c2..14a72224b657 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT
config NFS_V4_2_READ_PLUS
bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
depends on NFS_V4_2
- default y
+ default n
help
- Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS
- attempts to improve read performance by compressing out sparse holes
- in the file contents.
+ This is intended for developers only. The READ_PLUS operation has
+ been shown to have issues under specific conditions and should not
+ be used in production.
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d0cccddb7d08..321af81c456e 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -980,14 +980,11 @@ out_invalidcred:
}
static int
-nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+nfs_callback_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *procp = rqstp->rq_procinfo;
- svcxdr_init_decode(rqstp);
- svcxdr_init_encode(rqstp);
-
- *statp = procp->pc_func(rqstp);
+ *rqstp->rq_accept_statp = procp->pc_func(rqstp);
return 1;
}
@@ -1072,7 +1069,8 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
}
};
-static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)]);
const struct svc_version nfs4_callback_version1 = {
.vs_vers = 1,
.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
@@ -1084,7 +1082,8 @@ const struct svc_version nfs4_callback_version1 = {
.vs_need_cong_ctrl = true,
};
-static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)]);
const struct svc_version nfs4_callback_version4 = {
.vs_vers = 4,
.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ea1ceffa1d3a..a41c3ee4549c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -203,14 +203,14 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
{
struct nfs_cache_array *array;
- array = kmap_atomic(page);
+ array = kmap_local_page(page);
array->change_attr = change_attr;
array->last_cookie = last_cookie;
array->size = 0;
array->page_full = 0;
array->page_is_eof = 0;
array->cookies_are_ordered = 1;
- kunmap_atomic(array);
+ kunmap_local(array);
}
/*
@@ -221,11 +221,11 @@ static void nfs_readdir_clear_array(struct page *page)
struct nfs_cache_array *array;
unsigned int i;
- array = kmap_atomic(page);
+ array = kmap_local_page(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].name);
array->size = 0;
- kunmap_atomic(array);
+ kunmap_local(array);
}
static void nfs_readdir_free_folio(struct folio *folio)
@@ -371,14 +371,14 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
u64 change_attr)
{
- struct nfs_cache_array *array = kmap_atomic(page);
+ struct nfs_cache_array *array = kmap_local_page(page);
int ret = true;
if (array->change_attr != change_attr)
ret = false;
if (nfs_readdir_array_index_cookie(array) != last_cookie)
ret = false;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
@@ -418,9 +418,9 @@ static u64 nfs_readdir_page_last_cookie(struct page *page)
struct nfs_cache_array *array;
u64 ret;
- array = kmap_atomic(page);
+ array = kmap_local_page(page);
ret = array->last_cookie;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
@@ -429,9 +429,9 @@ static bool nfs_readdir_page_needs_filling(struct page *page)
struct nfs_cache_array *array;
bool ret;
- array = kmap_atomic(page);
+ array = kmap_local_page(page);
ret = !nfs_readdir_array_is_full(array);
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
@@ -439,9 +439,9 @@ static void nfs_readdir_page_set_eof(struct page *page)
{
struct nfs_cache_array *array;
- array = kmap_atomic(page);
+ array = kmap_local_page(page);
nfs_readdir_array_set_eof(array);
- kunmap_atomic(array);
+ kunmap_local(array);
}
static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
@@ -568,14 +568,14 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
struct nfs_cache_array *array;
int status;
- array = kmap_atomic(desc->page);
+ array = kmap_local_page(desc->page);
if (desc->dir_cookie == 0)
status = nfs_readdir_search_for_pos(array, desc);
else
status = nfs_readdir_search_for_cookie(array, desc);
- kunmap_atomic(array);
+ kunmap_local(array);
return status;
}
@@ -2296,7 +2296,7 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct iattr attr;
@@ -2325,7 +2325,7 @@ EXPORT_SYMBOL_GPL(nfs_create);
* See comments for nfs_proc_create regarding failed operations.
*/
int
-nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
@@ -2352,7 +2352,7 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct iattr attr;
@@ -2524,7 +2524,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
* now have a new file handle and can instantiate an in-core NFS inode
* and move the raw page into its mapping.
*/
-int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
struct page *page;
@@ -2642,7 +2642,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
* If these conditions are met, we can drop the dentries before doing
* the rename.
*/
-int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
@@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task,
const struct cred *cred)
{
const struct task_struct *parent;
+ const struct cred *pcred;
u64 ret;
rcu_read_lock();
for (;;) {
parent = rcu_dereference(task->real_parent);
- if (parent == task || cred_fscmp(parent->cred, cred) != 0)
+ pcred = rcu_dereference(parent->cred);
+ if (parent == task || cred_fscmp(pcred, cred) != 0)
break;
task = parent;
}
@@ -3023,6 +3025,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
* but do it without locking.
*/
struct nfs_inode *nfsi = NFS_I(inode);
+ u64 login_time = nfs_access_login_time(current, cred);
struct nfs_access_entry *cache;
int err = -ECHILD;
struct list_head *lh;
@@ -3037,6 +3040,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
cache = NULL;
if (cache == NULL)
goto out;
+ if ((s64)(login_time - cache->timestamp) > 0)
+ goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
*mask = cache->mask;
@@ -3257,7 +3262,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
return ret;
}
-int nfs_permission(struct user_namespace *mnt_userns,
+int nfs_permission(struct mnt_idmap *idmap,
struct inode *inode,
int mask)
{
@@ -3308,7 +3313,7 @@ out_notsup:
res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER);
if (res == 0)
- res = generic_permission(&init_user_ns, inode, mask);
+ res = generic_permission(&nop_mnt_idmap, inode, mask);
goto out;
}
EXPORT_SYMBOL_GPL(nfs_permission);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1707f46b1335..9a18c5a69ace 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -343,14 +343,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, pagevec[i],
- pgbase, req_len);
+ req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
+ pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
}
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
if (!nfs_pageio_add_request(&desc, req)) {
result = desc.pg_error;
nfs_release_request(req);
@@ -802,8 +800,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- req = nfs_create_request(dreq->ctx, pagevec[i],
- pgbase, req_len);
+ req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
+ pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
@@ -816,8 +814,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
}
nfs_lock_request(req);
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
if (!nfs_pageio_add_request(&desc, req)) {
result = desc.pg_error;
nfs_unlock_and_release_request(req);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 01596f2d0a1e..d6a6d1ebb8fd 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -42,7 +42,7 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
dprintk("%s: max fh len %d inode %p parent %p",
__func__, *max_len, inode, parent);
- if (*max_len < len || IS_AUTOMOUNT(inode)) {
+ if (*max_len < len) {
dprintk("%s: fh len %d too small, required %d\n",
__func__, *max_len, len);
*max_len = len;
@@ -145,17 +145,10 @@ out:
return parent;
}
-static u64 nfs_fetch_iversion(struct inode *inode)
-{
- nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
- return inode_peek_iversion_raw(inode);
-}
-
const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
- .fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
EXPORT_OP_NOATOMIC_ATTR,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d8ec889a4b3f..893625eacab9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -31,6 +31,7 @@
#include <linux/swap.h>
#include <linux/uaccess.h>
+#include <linux/filelock.h>
#include "delegation.h"
#include "internal.h"
@@ -276,27 +277,28 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync);
* and that the new data won't completely replace the old data in
* that range of the file.
*/
-static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
+static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos,
+ unsigned int len)
{
- unsigned int pglen = nfs_page_length(page);
- unsigned int offset = pos & (PAGE_SIZE - 1);
+ unsigned int pglen = nfs_folio_length(folio);
+ unsigned int offset = offset_in_folio(folio, pos);
unsigned int end = offset + len;
return !pglen || (end >= pglen && !offset);
}
-static bool nfs_want_read_modify_write(struct file *file, struct page *page,
- loff_t pos, unsigned int len)
+static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
+ loff_t pos, unsigned int len)
{
/*
* Up-to-date pages, those with ongoing or full-page write
* don't need read/modify/write
*/
- if (PageUptodate(page) || PagePrivate(page) ||
- nfs_full_page_write(page, pos, len))
+ if (folio_test_uptodate(folio) || folio_test_private(folio) ||
+ nfs_folio_is_full_write(folio, pos, len))
return false;
- if (pnfs_ld_read_whole_page(file->f_mapping->host))
+ if (pnfs_ld_read_whole_page(file_inode(file)))
return true;
/* Open for reading too? */
if (file->f_mode & FMODE_READ)
@@ -304,6 +306,15 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page,
return false;
}
+static struct folio *
+nfs_folio_grab_cache_write_begin(struct address_space *mapping, pgoff_t index)
+{
+ unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+
+ return __filemap_get_folio(mapping, index, fgp_flags,
+ mapping_gfp_mask(mapping));
+}
+
/*
* This does the "real" work of the write. We must allocate and lock the
* page to be sent back to the generic routine, which then copies the
@@ -313,32 +324,31 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page,
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct page **pagep,
+ void **fsdata)
{
- int ret;
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
int once_thru = 0;
+ int ret;
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
start:
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
+ folio = nfs_folio_grab_cache_write_begin(mapping, pos >> PAGE_SHIFT);
+ if (!folio)
return -ENOMEM;
- *pagep = page;
+ *pagep = &folio->page;
- ret = nfs_flush_incompatible(file, page);
+ ret = nfs_flush_incompatible(file, folio);
if (ret) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
} else if (!once_thru &&
- nfs_want_read_modify_write(file, page, pos, len)) {
+ nfs_want_read_modify_write(file, folio, pos, len)) {
once_thru = 1;
- ret = nfs_read_folio(file, page_folio(page));
- put_page(page);
+ ret = nfs_read_folio(file, folio);
+ folio_put(folio);
if (!ret)
goto start;
}
@@ -346,11 +356,12 @@ start:
}
static int nfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
- unsigned offset = pos & (PAGE_SIZE - 1);
struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct folio *folio = page_folio(page);
+ unsigned offset = offset_in_folio(folio, pos);
int status;
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
@@ -360,26 +371,26 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
* Zero any uninitialised parts of the page, and then mark the page
* as up to date if it turns out that we're extending the file.
*/
- if (!PageUptodate(page)) {
- unsigned pglen = nfs_page_length(page);
+ if (!folio_test_uptodate(folio)) {
+ size_t fsize = folio_size(folio);
+ unsigned pglen = nfs_folio_length(folio);
unsigned end = offset + copied;
if (pglen == 0) {
- zero_user_segments(page, 0, offset,
- end, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segments(folio, 0, offset, end, fsize);
+ folio_mark_uptodate(folio);
} else if (end >= pglen) {
- zero_user_segment(page, end, PAGE_SIZE);
+ folio_zero_segment(folio, end, fsize);
if (offset == 0)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
} else
- zero_user_segment(page, pglen, PAGE_SIZE);
+ folio_zero_segment(folio, pglen, fsize);
}
- status = nfs_updatepage(file, page, offset, copied);
+ status = nfs_update_folio(file, folio, offset, copied);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (status < 0)
return status;
@@ -401,14 +412,16 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
+ struct inode *inode = folio_file_mapping(folio)->host;
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
if (offset != 0 || length < folio_size(folio))
return;
/* Cancel any unstarted writes on this page */
- nfs_wb_folio_cancel(folio->mapping->host, folio);
+ nfs_wb_folio_cancel(inode, folio);
folio_wait_fscache(folio);
+ trace_nfs_invalidate_folio(inode, folio);
}
/*
@@ -422,8 +435,13 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio);
/* If the private flag is set, then the folio is not freeable */
- if (folio_test_private(folio))
- return false;
+ if (folio_test_private(folio)) {
+ if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
+ current_is_kswapd())
+ return false;
+ if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
+ return false;
+ }
return nfs_fscache_release_folio(folio, gfp);
}
@@ -464,12 +482,15 @@ static void nfs_check_dirty_writeback(struct folio *folio,
static int nfs_launder_folio(struct folio *folio)
{
struct inode *inode = folio->mapping->host;
+ int ret;
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
folio_wait_fscache(folio);
- return nfs_wb_page(inode, &folio->page);
+ ret = nfs_wb_folio(inode, folio);
+ trace_nfs_launder_folio_done(inode, folio, ret);
+ return ret;
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -546,22 +567,22 @@ const struct address_space_operations nfs_file_aops = {
*/
static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
vm_fault_t ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
+ struct folio *folio = page_folio(vmf->page);
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
- filp, filp->f_mapping->host->i_ino,
- (long long)page_offset(page));
+ filp, filp->f_mapping->host->i_ino,
+ (long long)folio_file_pos(folio));
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- if (PageFsCache(page) &&
- wait_on_page_fscache_killable(vmf->page) < 0) {
+ if (folio_test_fscache(folio) &&
+ folio_wait_fscache_killable(folio) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
@@ -570,25 +591,25 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
- lock_page(page);
- mapping = page_file_mapping(page);
+ folio_lock(folio);
+ mapping = folio_file_mapping(folio);
if (mapping != inode->i_mapping)
goto out_unlock;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
- pagelen = nfs_page_length(page);
+ pagelen = nfs_folio_length(folio);
if (pagelen == 0)
goto out_unlock;
ret = VM_FAULT_LOCKED;
- if (nfs_flush_incompatible(filp, page) == 0 &&
- nfs_updatepage(filp, page, 0, pagelen) == 0)
+ if (nfs_flush_incompatible(filp, folio) == 0 &&
+ nfs_update_folio(filp, folio, 0, pagelen) == 0)
goto out;
ret = VM_FAULT_SIGBUS;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
out:
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ad34a33b0737..ce8f8934bca5 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
return &fl->generic_hdr;
}
+static bool
+filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
+{
+ return flseg->num_fh > 1;
+}
+
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
@@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
size = pnfs_generic_pg_test(pgio, prev, req);
if (!size)
return 0;
+ else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg)))
+ return size;
/* see if req and prev are in the same stripe */
if (prev) {
@@ -854,6 +862,8 @@ fl_pnfs_update_layout(struct inode *ino,
status = filelayout_check_deviceid(lo, fl, gfp_flags);
if (status) {
+ pnfs_error_mark_layout_for_return(ino, lseg);
+ pnfs_set_lo_fail(lseg);
pnfs_put_lseg(lseg);
lseg = NULL;
}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e731c00a9fcb..ea5f2976dfab 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -245,14 +245,12 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page)
struct netfs_cache_resources cres;
struct fscache_cookie *cookie = nfs_i_fscache(inode);
struct iov_iter iter;
- struct bio_vec bvec[1];
+ struct bio_vec bvec;
int ret;
memset(&cres, 0, sizeof(cres));
- bvec[0].bv_page = page;
- bvec[0].bv_offset = 0;
- bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ bvec_set_page(&bvec, page, PAGE_SIZE, 0);
+ iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE);
ret = fscache_begin_read_operation(&cres, cookie);
if (ret < 0)
@@ -273,16 +271,14 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page,
struct netfs_cache_resources cres;
struct fscache_cookie *cookie = nfs_i_fscache(inode);
struct iov_iter iter;
- struct bio_vec bvec[1];
+ struct bio_vec bvec;
loff_t start = page_offset(page);
size_t len = PAGE_SIZE;
int ret;
memset(&cres, 0, sizeof(cres));
- bvec[0].bv_page = page;
- bvec[0].bv_offset = 0;
- bvec[0].bv_len = PAGE_SIZE;
- iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
+ bvec_set_page(&bvec, page, PAGE_SIZE, 0);
+ iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
ret = fscache_begin_write_operation(&cres, cookie);
if (ret < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e98ee7599eeb..222a28320e1c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -606,7 +606,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
int
-nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
@@ -825,10 +825,12 @@ static u32 nfs_get_valid_attrmask(struct inode *inode)
reply_mask |= STATX_UID | STATX_GID;
if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
reply_mask |= STATX_BLOCKS;
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE))
+ reply_mask |= STATX_CHANGE_COOKIE;
return reply_mask;
}
-int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+int nfs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
@@ -843,7 +845,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
- STATX_INO | STATX_SIZE | STATX_BLOCKS;
+ STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME |
+ STATX_CHANGE_COOKIE;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
if (readdirplus_enabled)
@@ -851,8 +854,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
goto out_no_revalidate;
}
- /* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+ /* Flush out writes to the server in order to update c/mtime/version. */
+ if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) &&
S_ISREG(inode->i_mode))
filemap_write_and_wait(inode->i_mapping);
@@ -872,7 +875,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/* Is the user requesting attributes that might need revalidation? */
if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME|
STATX_MTIME|STATX_UID|STATX_GID|
- STATX_SIZE|STATX_BLOCKS)))
+ STATX_SIZE|STATX_BLOCKS|
+ STATX_CHANGE_COOKIE)))
goto out_no_revalidate;
/* Check whether the cached attributes are stale */
@@ -908,8 +912,12 @@ out_no_revalidate:
/* Only return attributes that were revalidated. */
stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
- generic_fillattr(&init_user_ns, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
+ stat->change_cookie = inode_peek_iversion_raw(inode);
+ stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
+ if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED)
+ stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
if (S_ISDIR(inode->i_mode))
stat->blksize = NFS_SERVER(inode)->dtsize;
out:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ae7d4a8c728c..2a65fe2a63ab 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -384,18 +384,18 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
-int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t, bool);
-int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
umode_t);
int nfs_rmdir(struct inode *, struct dentry *);
int nfs_unlink(struct inode *, struct dentry *);
-int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *,
const char *);
int nfs_link(struct dentry *, struct inode *, struct dentry *);
-int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
+int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t,
dev_t);
-int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
+int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
#ifdef CONFIG_NFS_V4_2
@@ -760,17 +760,18 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
* Record the page as unstable (an extra writeback period) and mark its
* inode as dirty.
*/
-static inline
-void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo)
+static inline void nfs_folio_mark_unstable(struct folio *folio,
+ struct nfs_commit_info *cinfo)
{
- if (!cinfo->dreq) {
- struct inode *inode = page_file_mapping(page)->host;
+ if (folio && !cinfo->dreq) {
+ struct inode *inode = folio_file_mapping(folio)->host;
+ long nr = folio_nr_pages(folio);
/* This page is really still in write-back - just that the
* writeback is happening on the server now.
*/
- inc_node_page_state(page, NR_WRITEBACK);
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+ node_stat_mod_folio(folio, NR_WRITEBACK, nr);
+ wb_stat_mod(&inode_to_bdi(inode)->wb, WB_WRITEBACK, nr);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
}
}
@@ -795,6 +796,24 @@ unsigned int nfs_page_length(struct page *page)
}
/*
+ * Determine the number of bytes of data the page contains
+ */
+static inline size_t nfs_folio_length(struct folio *folio)
+{
+ loff_t i_size = i_size_read(folio_file_mapping(folio)->host);
+
+ if (i_size > 0) {
+ pgoff_t index = folio_index(folio) >> folio_order(folio);
+ pgoff_t end_index = (i_size - 1) >> folio_shift(folio);
+ if (index < end_index)
+ return folio_size(folio);
+ if (index == end_index)
+ return offset_in_folio(folio, i_size - 1) + 1;
+ }
+ return 0;
+}
+
+/*
* Convert a umode to a dirent->d_type
*/
static inline
@@ -807,11 +826,10 @@ unsigned char nfs_umode_to_dtype(umode_t mode)
* Determine the number of pages in an array of length 'len' and
* with a base offset of 'base'
*/
-static inline
-unsigned int nfs_page_array_len(unsigned int base, size_t len)
+static inline unsigned int nfs_page_array_len(unsigned int base, size_t len)
{
- return ((unsigned long)len + (unsigned long)base +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
}
/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index b0ef7e7ddb30..19d51ebf842c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -208,23 +208,23 @@ out_fc:
}
static int
-nfs_namespace_getattr(struct user_namespace *mnt_userns,
+nfs_namespace_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
if (NFS_FH(d_inode(path->dentry))->size != 0)
- return nfs_getattr(mnt_userns, path, stat, request_mask,
+ return nfs_getattr(idmap, path, stat, request_mask,
query_flags);
- generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
return 0;
}
static int
-nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
if (NFS_FH(d_inode(dentry))->size != 0)
- return nfs_setattr(mnt_userns, dentry, attr);
+ return nfs_setattr(idmap, dentry, attr);
return -EACCES;
}
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index df9ca56db347..4fa37dc038b5 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -12,7 +12,7 @@
*/
#ifdef CONFIG_NFS_V3_ACL
extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu);
-extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+extern int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type);
extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
struct posix_acl *dfacl);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 74d11e3c4205..1247f544a440 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -255,7 +255,7 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
}
-int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry,
+int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ecb428512fe1..93e306bf4430 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -460,7 +460,8 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
if (err >= 0)
break;
- if (err == -ENOTSUPP &&
+ if ((err == -ENOTSUPP ||
+ err == -NFS4ERR_OFFLOAD_DENIED) &&
nfs42_files_from_same_server(src, dst)) {
err = -EOPNOTSUPP;
break;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5edd1704f735..4c9f8bd866ab 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -23,6 +23,7 @@
#define NFS4_MAX_LOOP_ON_RECOVER (10)
#include <linux/seqlock.h>
+#include <linux/filelock.h>
struct idmap;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 40d749f29ed3..22a93ae46cd7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7692,7 +7692,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7716,7 +7716,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl"
static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7739,7 +7739,7 @@ static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry)
#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl"
static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7764,7 +7764,7 @@ static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry)
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -7815,7 +7815,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
#ifdef CONFIG_NFS_V4_2
static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
const char *key, const void *buf,
size_t buflen, int flags)
@@ -10604,7 +10604,9 @@ static void nfs4_disable_swap(struct inode *inode)
/* The state manager thread will now exit once it is
* woken.
*/
- wake_up_var(&NFS_SERVER(inode)->nfs_client->cl_state);
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ nfs4_schedule_state_manager(clp);
}
static const struct inode_operations nfs4_dir_inode_operations = {
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 214bc56f92d2..d27919d7241d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -292,32 +292,34 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
+TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_AVAILABLE);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
+TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_DELAYED);
#define show_nfs4_clp_state(state) \
__print_flags(state, "|", \
- { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \
- { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \
- { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \
- { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \
- { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \
- { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \
- { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \
- { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \
- { NFS4CLNT_SERVER_SCOPE_MISMATCH, \
- "SERVER_SCOPE_MISMATCH" }, \
- { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \
- { NFS4CLNT_BIND_CONN_TO_SESSION, \
- "BIND_CONN_TO_SESSION" }, \
- { NFS4CLNT_MOVED, "MOVED" }, \
- { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
- { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
- { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
- { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \
- { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
- { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
+ { BIT(NFS4CLNT_MANAGER_RUNNING), "MANAGER_RUNNING" }, \
+ { BIT(NFS4CLNT_CHECK_LEASE), "CHECK_LEASE" }, \
+ { BIT(NFS4CLNT_LEASE_EXPIRED), "LEASE_EXPIRED" }, \
+ { BIT(NFS4CLNT_RECLAIM_REBOOT), "RECLAIM_REBOOT" }, \
+ { BIT(NFS4CLNT_RECLAIM_NOGRACE), "RECLAIM_NOGRACE" }, \
+ { BIT(NFS4CLNT_DELEGRETURN), "DELEGRETURN" }, \
+ { BIT(NFS4CLNT_SESSION_RESET), "SESSION_RESET" }, \
+ { BIT(NFS4CLNT_LEASE_CONFIRM), "LEASE_CONFIRM" }, \
+ { BIT(NFS4CLNT_SERVER_SCOPE_MISMATCH), "SERVER_SCOPE_MISMATCH" }, \
+ { BIT(NFS4CLNT_PURGE_STATE), "PURGE_STATE" }, \
+ { BIT(NFS4CLNT_BIND_CONN_TO_SESSION), "BIND_CONN_TO_SESSION" }, \
+ { BIT(NFS4CLNT_MOVED), "MOVED" }, \
+ { BIT(NFS4CLNT_LEASE_MOVED), "LEASE_MOVED" }, \
+ { BIT(NFS4CLNT_DELEGATION_EXPIRED), "DELEGATION_EXPIRED" }, \
+ { BIT(NFS4CLNT_RUN_MANAGER), "RUN_MANAGER" }, \
+ { BIT(NFS4CLNT_MANAGER_AVAILABLE), "MANAGER_AVAILABLE" }, \
+ { BIT(NFS4CLNT_RECALL_RUNNING), "RECALL_RUNNING" }, \
+ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_READ), "RECALL_ANY_LAYOUT_READ" }, \
+ { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_RW), "RECALL_ANY_LAYOUT_RW" }, \
+ { BIT(NFS4CLNT_DELEGRETURN_DELAYED), "DELERETURN_DELAYED" })
TRACE_EVENT(nfs4_state_mgr,
TP_PROTO(
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 642f6921852f..a778713343df 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -152,8 +152,6 @@ DEFINE_NFS_INODE_EVENT(nfs_getattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit);
DEFINE_NFS_INODE_EVENT(nfs_setattr_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit);
-DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter);
-DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit);
DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
@@ -933,13 +931,13 @@ TRACE_EVENT(nfs_sillyrename_unlink,
)
);
-TRACE_EVENT(nfs_aop_readpage,
+DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
- struct page *page
+ struct folio *folio
),
- TP_ARGS(inode, page),
+ TP_ARGS(inode, folio),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -947,6 +945,7 @@ TRACE_EVENT(nfs_aop_readpage,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
@@ -956,26 +955,36 @@ TRACE_EVENT(nfs_aop_readpage,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->offset = folio_file_pos(folio);
+ __entry->count = nfs_folio_length(folio);
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "offset=%lld count=%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
- __entry->offset
+ __entry->offset, __entry->count
)
);
-TRACE_EVENT(nfs_aop_readpage_done,
+#define DEFINE_NFS_FOLIO_EVENT(name) \
+ DEFINE_EVENT(nfs_folio_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ struct folio *folio \
+ ), \
+ TP_ARGS(inode, folio))
+
+DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
- struct page *page,
+ struct folio *folio,
int ret
),
- TP_ARGS(inode, page, ret),
+ TP_ARGS(inode, folio, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -984,6 +993,7 @@ TRACE_EVENT(nfs_aop_readpage_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
@@ -993,19 +1003,39 @@ TRACE_EVENT(nfs_aop_readpage_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->offset = folio_file_pos(folio);
+ __entry->count = nfs_folio_length(folio);
__entry->ret = ret;
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld ret=%d",
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "offset=%lld count=%u ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
- __entry->offset, __entry->ret
+ __entry->offset, __entry->count, __entry->ret
)
);
+#define DEFINE_NFS_FOLIO_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_folio_event_done, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ struct folio *folio, \
+ int ret \
+ ), \
+ TP_ARGS(inode, folio, ret))
+
+DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done);
+
+DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio);
+DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done);
+
TRACE_EVENT(nfs_aop_readahead,
TP_PROTO(
const struct inode *inode,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 16be6dae524f..64fa8de199de 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -21,6 +21,7 @@
#include <linux/nfs_page.h>
#include <linux/nfs_mount.h>
#include <linux/export.h>
+#include <linux/filelock.h>
#include "internal.h"
#include "pnfs.h"
@@ -31,6 +32,42 @@
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
+struct nfs_page_iter_page {
+ const struct nfs_page *req;
+ size_t count;
+};
+
+static void nfs_page_iter_page_init(struct nfs_page_iter_page *i,
+ const struct nfs_page *req)
+{
+ i->req = req;
+ i->count = 0;
+}
+
+static void nfs_page_iter_page_advance(struct nfs_page_iter_page *i, size_t sz)
+{
+ const struct nfs_page *req = i->req;
+ size_t tmp = i->count + sz;
+
+ i->count = (tmp < req->wb_bytes) ? tmp : req->wb_bytes;
+}
+
+static struct page *nfs_page_iter_page_get(struct nfs_page_iter_page *i)
+{
+ const struct nfs_page *req = i->req;
+ struct page *page;
+
+ if (i->count != req->wb_bytes) {
+ size_t base = i->count + req->wb_pgbase;
+ size_t len = PAGE_SIZE - offset_in_page(base);
+
+ page = nfs_page_to_page(req, base);
+ nfs_page_iter_page_advance(i, len);
+ return page;
+ }
+ return NULL;
+}
+
static struct nfs_pgio_mirror *
nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
{
@@ -390,7 +427,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
* has extra ref from the write/commit path to handle handoff
* between write and commit lists. */
if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
- inode = page_file_mapping(req->wb_page)->host;
+ inode = nfs_page_to_inode(req);
set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
atomic_long_inc(&NFS_I(inode)->nrequests);
@@ -430,10 +467,9 @@ out:
nfs_release_request(head);
}
-static struct nfs_page *
-__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
- unsigned int pgbase, unsigned int offset,
- unsigned int count)
+static struct nfs_page *nfs_page_create(struct nfs_lock_context *l_ctx,
+ unsigned int pgbase, pgoff_t index,
+ unsigned int offset, unsigned int count)
{
struct nfs_page *req;
struct nfs_open_context *ctx = l_ctx->open_context;
@@ -452,42 +488,90 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
- req->wb_page = page;
- if (page) {
- req->wb_index = page_index(page);
- get_page(page);
- }
- req->wb_offset = offset;
- req->wb_pgbase = pgbase;
- req->wb_bytes = count;
+ req->wb_pgbase = pgbase;
+ req->wb_index = index;
+ req->wb_offset = offset;
+ req->wb_bytes = count;
kref_init(&req->wb_kref);
req->wb_nio = 0;
return req;
}
+static void nfs_page_assign_folio(struct nfs_page *req, struct folio *folio)
+{
+ if (folio != NULL) {
+ req->wb_folio = folio;
+ folio_get(folio);
+ set_bit(PG_FOLIO, &req->wb_flags);
+ }
+}
+
+static void nfs_page_assign_page(struct nfs_page *req, struct page *page)
+{
+ if (page != NULL) {
+ req->wb_page = page;
+ get_page(page);
+ }
+}
+
/**
- * nfs_create_request - Create an NFS read/write request.
+ * nfs_page_create_from_page - Create an NFS read/write request.
* @ctx: open context to use
* @page: page to write
- * @offset: starting offset within the page for the write
+ * @pgbase: starting offset within the page for the write
+ * @offset: file offset for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page.
* User should ensure it is safe to sleep in this function.
*/
-struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct page *page,
- unsigned int offset, unsigned int count)
+struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
+ struct page *page,
+ unsigned int pgbase, loff_t offset,
+ unsigned int count)
+{
+ struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
+ struct nfs_page *ret;
+
+ if (IS_ERR(l_ctx))
+ return ERR_CAST(l_ctx);
+ ret = nfs_page_create(l_ctx, pgbase, offset >> PAGE_SHIFT,
+ offset_in_page(offset), count);
+ if (!IS_ERR(ret)) {
+ nfs_page_assign_page(ret, page);
+ nfs_page_group_init(ret, NULL);
+ }
+ nfs_put_lock_context(l_ctx);
+ return ret;
+}
+
+/**
+ * nfs_page_create_from_folio - Create an NFS read/write request.
+ * @ctx: open context to use
+ * @folio: folio to write
+ * @offset: starting offset within the folio for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx,
+ struct folio *folio,
+ unsigned int offset,
+ unsigned int count)
{
struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
struct nfs_page *ret;
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
- ret = __nfs_create_request(l_ctx, page, offset, offset, count);
- if (!IS_ERR(ret))
+ ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count);
+ if (!IS_ERR(ret)) {
+ nfs_page_assign_folio(ret, folio);
nfs_page_group_init(ret, NULL);
+ }
nfs_put_lock_context(l_ctx);
return ret;
}
@@ -500,10 +584,16 @@ nfs_create_subreq(struct nfs_page *req,
{
struct nfs_page *last;
struct nfs_page *ret;
+ struct folio *folio = nfs_page_to_folio(req);
+ struct page *page = nfs_page_to_page(req, pgbase);
- ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
- pgbase, offset, count);
+ ret = nfs_page_create(req->wb_lock_context, pgbase, req->wb_index,
+ offset, count);
if (!IS_ERR(ret)) {
+ if (folio)
+ nfs_page_assign_folio(ret, folio);
+ else
+ nfs_page_assign_page(ret, page);
/* find the last request */
for (last = req->wb_head;
last->wb_this_page != req->wb_head;
@@ -511,7 +601,6 @@ nfs_create_subreq(struct nfs_page *req,
;
nfs_lock_request(ret);
- ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
ret->wb_nio = req->wb_nio;
}
@@ -550,11 +639,16 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
*/
static void nfs_clear_request(struct nfs_page *req)
{
+ struct folio *folio = nfs_page_to_folio(req);
struct page *page = req->wb_page;
struct nfs_lock_context *l_ctx = req->wb_lock_context;
struct nfs_open_context *ctx;
- if (page != NULL) {
+ if (folio != NULL) {
+ folio_put(folio);
+ req->wb_folio = NULL;
+ clear_bit(PG_FOLIO, &req->wb_flags);
+ } else if (page != NULL) {
put_page(page);
req->wb_page = NULL;
}
@@ -692,13 +786,14 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
* @hdr: The pageio hdr
+ * @pgbase: base
* @count: Number of bytes to read
* @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only)
*/
-static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
- unsigned int count,
- int how, struct nfs_commit_info *cinfo)
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int pgbase,
+ unsigned int count, int how,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *req = hdr->req;
@@ -709,7 +804,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
hdr->args.offset = req_offset(req);
/* pnfs_set_layoutcommit needs this */
hdr->mds_offset = hdr->args.offset;
- hdr->args.pgbase = req->wb_pgbase;
+ hdr->args.pgbase = pgbase;
hdr->args.pages = hdr->page_array.pagevec;
hdr->args.count = count;
hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
@@ -895,9 +990,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
struct nfs_commit_info cinfo;
struct nfs_page_array *pg_array = &hdr->page_array;
unsigned int pagecount, pageused;
+ unsigned int pg_base = offset_in_page(mirror->pg_base);
gfp_t gfp_flags = nfs_io_gfp_mask();
- pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+ pagecount = nfs_page_array_len(pg_base, mirror->pg_count);
pg_array->npages = pagecount;
if (pagecount <= ARRAY_SIZE(pg_array->page_array))
@@ -917,16 +1013,26 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
last_page = NULL;
pageused = 0;
while (!list_empty(head)) {
+ struct nfs_page_iter_page i;
+ struct page *page;
+
req = nfs_list_entry(head->next);
nfs_list_move_request(req, &hdr->pages);
- if (!last_page || last_page != req->wb_page) {
- pageused++;
- if (pageused > pagecount)
- break;
- *pages++ = last_page = req->wb_page;
+ if (req->wb_pgbase == 0)
+ last_page = NULL;
+
+ nfs_page_iter_page_init(&i, req);
+ while ((page = nfs_page_iter_page_get(&i)) != NULL) {
+ if (last_page != page) {
+ pageused++;
+ if (pageused > pagecount)
+ goto full;
+ *pages++ = last_page = page;
+ }
}
}
+full:
if (WARN_ON_ONCE(pageused != pagecount)) {
nfs_pgio_error(hdr);
desc->pg_error = -EINVAL;
@@ -938,7 +1044,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo);
+ nfs_pgio_rpcsetup(hdr, pg_base, mirror->pg_count, desc->pg_ioflags,
+ &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0;
}
@@ -1034,6 +1141,24 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
return l1->lockowner == l2->lockowner;
}
+static bool nfs_page_is_contiguous(const struct nfs_page *prev,
+ const struct nfs_page *req)
+{
+ size_t prev_end = prev->wb_pgbase + prev->wb_bytes;
+
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
+ if (req->wb_pgbase == 0)
+ return prev_end == nfs_page_max_length(prev);
+ if (req->wb_pgbase == prev_end) {
+ struct folio *folio = nfs_page_to_folio(req);
+ if (folio)
+ return folio == nfs_page_to_folio(prev);
+ return req->wb_page == prev->wb_page;
+ }
+ return false;
+}
+
/**
* nfs_coalesce_size - test two requests for compatibility
* @prev: pointer to nfs_page
@@ -1062,16 +1187,8 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev,
!nfs_match_lock_context(req->wb_lock_context,
prev->wb_lock_context))
return 0;
- if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ if (!nfs_page_is_contiguous(prev, req))
return 0;
- if (req->wb_page == prev->wb_page) {
- if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
- return 0;
- } else {
- if (req->wb_pgbase != 0 ||
- prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
- return 0;
- }
}
return pgio->pg_ops->pg_test(pgio, prev, req);
}
@@ -1411,16 +1528,21 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
{
struct nfs_pgio_mirror *mirror;
struct nfs_page *prev;
+ struct folio *folio;
u32 midx;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
mirror = nfs_pgio_get_mirror(desc, midx);
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
- if (index != prev->wb_index + 1) {
- nfs_pageio_complete(desc);
- break;
- }
+ folio = nfs_page_to_folio(prev);
+ if (folio) {
+ if (index == folio_next_index(folio))
+ continue;
+ } else if (index == prev->wb_index + 1)
+ continue;
+ nfs_pageio_complete(desc);
+ break;
}
}
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a5db5158c634..306cba0b9e69 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -511,7 +511,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
spin_lock(&inode->i_lock);
pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
- pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &head, &range, 0);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e3e6a41f19de..d886c8226d8f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -193,7 +193,7 @@ struct pnfs_commit_ops {
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
- struct page *page);
+ struct folio *folio);
};
struct pnfs_layout_hdr {
@@ -395,7 +395,7 @@ void pnfs_generic_rw_release(void *data);
void pnfs_generic_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo);
struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
- struct page *page);
+ struct folio *folio);
int pnfs_generic_commit_pagelist(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -557,13 +557,13 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct page *page)
+ struct folio *folio)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs)
return NULL;
- return fl_cinfo->ops->search_commit_reqs(cinfo, page);
+ return fl_cinfo->ops->search_commit_reqs(cinfo, folio);
}
/* Should the pNFS client commit and return the layout upon a setattr */
@@ -864,7 +864,7 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo)
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
- struct page *page)
+ struct folio *folio)
{
return NULL;
}
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 5d035dd2d7bf..a0112ad4937a 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
static struct nfs_page *
pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
- unsigned int nbuckets, struct page *page)
+ unsigned int nbuckets, struct folio *folio)
{
struct nfs_page *req;
struct pnfs_commit_bucket *b;
@@ -363,11 +363,11 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
* request is found */
for (i = 0, b = buckets; i < nbuckets; i++, b++) {
list_for_each_entry(req, &b->written, wb_list) {
- if (req->wb_page == page)
+ if (nfs_page_to_folio(req) == folio)
return req->wb_head;
}
list_for_each_entry(req, &b->committing, wb_list) {
- if (req->wb_page == page)
+ if (nfs_page_to_folio(req) == folio)
return req->wb_head;
}
}
@@ -375,14 +375,14 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
}
/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
- * for @page
+ * for @folio
* @cinfo - commit info for current inode
- * @page - page to search for matching head request
+ * @folio - page to search for matching head request
*
* Return: the head request if one is found, otherwise %NULL.
*/
-struct nfs_page *
-pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
+struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo,
+ struct folio *folio)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
struct pnfs_commit_array *array;
@@ -390,7 +390,7 @@ pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page
list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
req = pnfs_bucket_search_commit_reqs(array->buckets,
- array->nbuckets, page);
+ array->nbuckets, folio);
if (req)
return req;
}
@@ -1180,7 +1180,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
nfs_request_add_commit_list_locked(req, list, cinfo);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- nfs_mark_page_unstable(req->wb_page, cinfo);
+ nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
return;
out_resched:
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8ae2c8d1219d..c380cff4108e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -49,12 +49,11 @@ static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
-static
-int nfs_return_empty_page(struct page *page)
+static int nfs_return_empty_folio(struct folio *folio)
{
- zero_user(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- unlock_page(page);
+ folio_zero_segment(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
return 0;
}
@@ -111,18 +110,18 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_readpage_release(struct nfs_page *req, int error)
{
struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
- struct page *page = req->wb_page;
+ struct folio *folio = nfs_page_to_folio(req);
dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
(long long)req_offset(req));
if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
- SetPageError(page);
+ folio_set_error(folio);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- if (PageUptodate(page))
- nfs_fscache_write_page(inode, page);
- unlock_page(page);
+ if (folio_test_uptodate(folio))
+ nfs_fscache_write_page(inode, &folio->page);
+ folio_unlock(folio);
}
nfs_release_request(req);
}
@@ -135,7 +134,7 @@ struct nfs_readdesc {
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
- SetPageUptodate(req->wb_page);
+ folio_mark_uptodate(nfs_page_to_folio(req));
}
static void nfs_read_completion(struct nfs_pgio_header *hdr)
@@ -147,7 +146,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
goto out;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
- struct page *page = req->wb_page;
+ struct folio *folio = nfs_page_to_folio(req);
unsigned long start = req->wb_pgbase;
unsigned long end = req->wb_pgbase + req->wb_bytes;
@@ -157,14 +156,14 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
if (bytes > hdr->good_bytes) {
/* nothing in this request was good, so zero
* the full extent of the request */
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
} else if (hdr->good_bytes - bytes < req->wb_bytes) {
/* part of this request has good bytes, but
* not all. zero the bad bytes */
start += hdr->good_bytes - bytes;
WARN_ON(start < req->wb_pgbase);
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
}
}
error = 0;
@@ -281,33 +280,34 @@ static void nfs_readpage_result(struct rpc_task *task,
nfs_readpage_retry(task, hdr);
}
-static int
-readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
+static int readpage_async_filler(struct nfs_readdesc *desc, struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
+ struct inode *inode = folio_file_mapping(folio)->host;
+ struct nfs_server *server = NFS_SERVER(inode);
+ size_t fsize = folio_size(folio);
+ unsigned int rsize = server->rsize;
struct nfs_page *new;
unsigned int len, aligned_len;
int error;
- len = nfs_page_length(page);
+ len = nfs_folio_length(folio);
if (len == 0)
- return nfs_return_empty_page(page);
+ return nfs_return_empty_folio(folio);
- aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
+ aligned_len = min_t(unsigned int, ALIGN(len, rsize), fsize);
- if (!IS_SYNC(page->mapping->host)) {
- error = nfs_fscache_read_page(page->mapping->host, page);
+ if (!IS_SYNC(inode)) {
+ error = nfs_fscache_read_page(inode, &folio->page);
if (error == 0)
goto out_unlock;
}
- new = nfs_create_request(desc->ctx, page, 0, aligned_len);
+ new = nfs_page_create_from_folio(desc->ctx, folio, 0, aligned_len);
if (IS_ERR(new))
goto out_error;
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
+ if (len < fsize)
+ folio_zero_segment(folio, len, fsize);
if (!nfs_pageio_add_request(&desc->pgio, new)) {
nfs_list_remove_request(new);
error = desc->pgio.pg_error;
@@ -318,7 +318,7 @@ readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
out_error:
error = PTR_ERR(new);
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
out:
return error;
}
@@ -331,61 +331,54 @@ out:
*/
int nfs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
struct nfs_readdesc desc;
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = file_inode(file);
int ret;
- trace_nfs_aop_readpage(inode, page);
+ trace_nfs_aop_readpage(inode, folio);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
/*
* Try to flush any pending writes to the file..
*
- * NOTE! Because we own the page lock, there cannot
+ * NOTE! Because we own the folio lock, there cannot
* be any new pending writes generated at this point
- * for this page (other pages can be written to).
+ * for this folio (other folios can be written to).
*/
- ret = nfs_wb_page(inode, page);
+ ret = nfs_wb_folio(inode, folio);
if (ret)
goto out_unlock;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
goto out_unlock;
ret = -ESTALE;
if (NFS_STALE(inode))
goto out_unlock;
- if (file == NULL) {
- ret = -EBADF;
- desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
- if (desc.ctx == NULL)
- goto out_unlock;
- } else
- desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
xchg(&desc.ctx->error, 0);
nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
- ret = readpage_async_filler(&desc, page);
+ ret = readpage_async_filler(&desc, folio);
if (ret)
goto out;
nfs_pageio_complete_read(&desc.pgio);
ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
if (!ret) {
- ret = wait_on_page_locked_killable(page);
- if (!PageUptodate(page) && !ret)
+ ret = folio_wait_locked_killable(folio);
+ if (!folio_test_uptodate(folio) && !ret)
ret = xchg(&desc.ctx->error, 0);
}
out:
put_nfs_open_context(desc.ctx);
- trace_nfs_aop_readpage_done(inode, page, ret);
+ trace_nfs_aop_readpage_done(inode, folio, ret);
return ret;
out_unlock:
- unlock_page(page);
- trace_nfs_aop_readpage_done(inode, page, ret);
+ folio_unlock(folio);
+ trace_nfs_aop_readpage_done(inode, folio, ret);
return ret;
}
@@ -395,7 +388,7 @@ void nfs_readahead(struct readahead_control *ractl)
struct file *file = ractl->file;
struct nfs_readdesc desc;
struct inode *inode = ractl->mapping->host;
- struct page *page;
+ struct folio *folio;
int ret;
trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
@@ -416,9 +409,8 @@ void nfs_readahead(struct readahead_control *ractl)
nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
- while ((page = readahead_page(ractl)) != NULL) {
- ret = readpage_async_filler(&desc, page);
- put_page(page);
+ while ((folio = readahead_folio(ractl)) != NULL) {
+ ret = readpage_async_filler(&desc, folio);
if (ret)
break;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80c240e50952..f4cca8f00c0c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/wait.h>
#include <linux/iversion.h>
+#include <linux/filelock.h>
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
@@ -63,7 +64,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct page *page);
+ struct folio *folio);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -170,31 +171,28 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
return 0;
}
-static struct nfs_page *
-nfs_page_private_request(struct page *page)
+static struct nfs_page *nfs_folio_private_request(struct folio *folio)
{
- if (!PagePrivate(page))
- return NULL;
- return (struct nfs_page *)page_private(page);
+ return folio_get_private(folio);
}
-/*
- * nfs_page_find_head_request_locked - find head request associated with @page
+/**
+ * nfs_folio_find_private_request - find head request associated with a folio
+ * @folio: pointer to folio
*
* must be called while holding the inode lock.
*
* returns matching head request with reference held, or NULL if not found.
*/
-static struct nfs_page *
-nfs_page_find_private_request(struct page *page)
+static struct nfs_page *nfs_folio_find_private_request(struct folio *folio)
{
- struct address_space *mapping = page_file_mapping(page);
+ struct address_space *mapping = folio_file_mapping(folio);
struct nfs_page *req;
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
return NULL;
spin_lock(&mapping->private_lock);
- req = nfs_page_private_request(page);
+ req = nfs_folio_private_request(folio);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
@@ -203,18 +201,17 @@ nfs_page_find_private_request(struct page *page)
return req;
}
-static struct nfs_page *
-nfs_page_find_swap_request(struct page *page)
+static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req = NULL;
- if (!PageSwapCache(page))
+ if (!folio_test_swapcache(folio))
return NULL;
mutex_lock(&nfsi->commit_mutex);
- if (PageSwapCache(page)) {
+ if (folio_test_swapcache(folio)) {
req = nfs_page_search_commits_for_head_request_locked(nfsi,
- page);
+ folio);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref);
@@ -224,29 +221,30 @@ nfs_page_find_swap_request(struct page *page)
return req;
}
-/*
- * nfs_page_find_head_request - find head request associated with @page
+/**
+ * nfs_folio_find_head_request - find head request associated with a folio
+ * @folio: pointer to folio
*
* returns matching head request with reference held, or NULL if not found.
*/
-static struct nfs_page *nfs_page_find_head_request(struct page *page)
+static struct nfs_page *nfs_folio_find_head_request(struct folio *folio)
{
struct nfs_page *req;
- req = nfs_page_find_private_request(page);
+ req = nfs_folio_find_private_request(folio);
if (!req)
- req = nfs_page_find_swap_request(page);
+ req = nfs_folio_find_swap_request(folio);
return req;
}
-static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
+static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_page *req, *head;
int ret;
for (;;) {
- req = nfs_page_find_head_request(page);
+ req = nfs_folio_find_head_request(folio);
if (!req)
return req;
head = nfs_page_group_lock_head(req);
@@ -260,9 +258,9 @@ static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
return ERR_PTR(ret);
}
/* Ensure that nobody removed the request before we locked it */
- if (head == nfs_page_private_request(page))
+ if (head == nfs_folio_private_request(folio))
break;
- if (PageSwapCache(page))
+ if (folio_test_swapcache(folio))
break;
nfs_unlock_and_release_request(head);
}
@@ -270,18 +268,19 @@ static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
}
/* Adjust the file length if we're writing beyond the end */
-static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
+static void nfs_grow_file(struct folio *folio, unsigned int offset,
+ unsigned int count)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
- end_index = (i_size - 1) >> PAGE_SHIFT;
- if (i_size > 0 && page_index(page) < end_index)
+ end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio);
+ if (i_size > 0 && folio_index(folio) < end_index)
goto out;
- end = page_file_offset(page) + ((loff_t)offset+count);
+ end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count;
if (i_size >= end)
goto out;
trace_nfs_size_grow(inode, end);
@@ -307,11 +306,11 @@ static void nfs_set_pageerror(struct address_space *mapping)
spin_unlock(&inode->i_lock);
}
-static void nfs_mapping_set_error(struct page *page, int error)
+static void nfs_mapping_set_error(struct folio *folio, int error)
{
- struct address_space *mapping = page_file_mapping(page);
+ struct address_space *mapping = folio_file_mapping(folio);
- SetPageError(page);
+ folio_set_error(folio);
filemap_set_wb_err(mapping, error);
if (mapping->host)
errseq_set(&mapping->host->i_sb->s_wb_err,
@@ -358,9 +357,9 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
*/
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
+ unsigned int len = nfs_folio_length(nfs_page_to_folio(req));
struct nfs_page *tmp;
unsigned int pos = 0;
- unsigned int len = nfs_page_length(req->wb_page);
nfs_page_group_lock(req);
@@ -380,11 +379,13 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
*/
static void nfs_mark_uptodate(struct nfs_page *req)
{
- if (PageUptodate(req->wb_page))
+ struct folio *folio = nfs_page_to_folio(req);
+
+ if (folio_test_uptodate(folio))
return;
if (!nfs_page_group_covers_page(req))
return;
- SetPageUptodate(req->wb_page);
+ folio_mark_uptodate(folio);
}
static int wb_priority(struct writeback_control *wbc)
@@ -406,35 +407,34 @@ int nfs_congestion_kb;
#define NFS_CONGESTION_OFF_THRESH \
(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
-static void nfs_set_page_writeback(struct page *page)
+static void nfs_folio_set_writeback(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_server *nfss = NFS_SERVER(inode);
- int ret = test_set_page_writeback(page);
-
- WARN_ON_ONCE(ret != 0);
+ struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
- if (atomic_long_inc_return(&nfss->writeback) >
- NFS_CONGESTION_ON_THRESH)
+ folio_start_writeback(folio);
+ if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH)
nfss->write_congested = 1;
}
-static void nfs_end_page_writeback(struct nfs_page *req)
+static void nfs_folio_end_writeback(struct folio *folio)
{
- struct inode *inode = page_file_mapping(req->wb_page)->host;
- struct nfs_server *nfss = NFS_SERVER(inode);
- bool is_done;
+ struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host);
- is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
- nfs_unlock_request(req);
- if (!is_done)
- return;
-
- end_page_writeback(req->wb_page);
- if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
+ folio_end_writeback(folio);
+ if (atomic_long_dec_return(&nfss->writeback) <
+ NFS_CONGESTION_OFF_THRESH)
nfss->write_congested = 0;
}
+static void nfs_page_end_writeback(struct nfs_page *req)
+{
+ if (nfs_page_group_sync_on_bit(req, PG_WB_END)) {
+ nfs_unlock_request(req);
+ nfs_folio_end_writeback(nfs_page_to_folio(req));
+ } else
+ nfs_unlock_request(req);
+}
+
/*
* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
*
@@ -549,7 +549,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
/*
* nfs_lock_and_join_requests - join all subreqs to the head req
- * @page: the page used to lookup the "page group" of nfs_page structures
+ * @folio: the folio used to lookup the "page group" of nfs_page structures
*
* This function joins all sub requests to the head request by first
* locking all requests in the group, cancelling any pending operations
@@ -559,13 +559,12 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
*
* Returns a locked, referenced pointer to the head request - which after
* this call is guaranteed to be the only request associated with the page.
- * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * Returns NULL if no requests are found for @folio, or a ERR_PTR if an
* error was encountered.
*/
-static struct nfs_page *
-nfs_lock_and_join_requests(struct page *page)
+static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_page *head;
int ret;
@@ -574,7 +573,7 @@ nfs_lock_and_join_requests(struct page *page)
* reference to the whole page group - the group will not be destroyed
* until the head reference is released.
*/
- head = nfs_find_and_lock_page_request(page);
+ head = nfs_folio_find_and_lock_request(folio);
if (IS_ERR_OR_NULL(head))
return head;
@@ -592,11 +591,10 @@ nfs_lock_and_join_requests(struct page *page)
static void nfs_write_error(struct nfs_page *req, int error)
{
- trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req,
- error);
- nfs_mapping_set_error(req->wb_page, error);
+ trace_nfs_write_error(nfs_page_to_inode(req), req, error);
+ nfs_mapping_set_error(nfs_page_to_folio(req), error);
nfs_inode_remove_request(req);
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
@@ -604,21 +602,21 @@ static void nfs_write_error(struct nfs_page *req, int error)
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
*/
-static int nfs_page_async_flush(struct page *page,
+static int nfs_page_async_flush(struct folio *folio,
struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
struct nfs_page *req;
int ret = 0;
- req = nfs_lock_and_join_requests(page);
+ req = nfs_lock_and_join_requests(folio);
if (!req)
goto out;
ret = PTR_ERR(req);
if (IS_ERR(req))
goto out;
- nfs_set_page_writeback(page);
+ nfs_folio_set_writeback(folio);
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
/* If there is a fatal error that covers this write, just exit */
@@ -636,12 +634,12 @@ static int nfs_page_async_flush(struct page *page,
goto out_launder;
if (wbc->sync_mode == WB_SYNC_NONE)
ret = AOP_WRITEPAGE_ACTIVATE;
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
nfs_redirty_request(req);
pgio->pg_error = 0;
} else
- nfs_add_stats(page_file_mapping(page)->host,
- NFSIOS_WRITEPAGES, 1);
+ nfs_add_stats(folio_file_mapping(folio)->host,
+ NFSIOS_WRITEPAGES, 1);
out:
return ret;
out_launder:
@@ -649,21 +647,21 @@ out_launder:
return 0;
}
-static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
+static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
- nfs_pageio_cond_complete(pgio, page_index(page));
- return nfs_page_async_flush(page, wbc, pgio);
+ nfs_pageio_cond_complete(pgio, folio_index(folio));
+ return nfs_page_async_flush(folio, wbc, pgio);
}
/*
* Write an mmapped page to the server.
*/
-static int nfs_writepage_locked(struct page *page,
+static int nfs_writepage_locked(struct folio *folio,
struct writeback_control *wbc)
{
struct nfs_pageio_descriptor pgio;
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
@@ -671,9 +669,9 @@ static int nfs_writepage_locked(struct page *page,
return AOP_WRITEPAGE_ACTIVATE;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
- nfs_pageio_init_write(&pgio, inode, 0,
- false, &nfs_async_write_completion_ops);
- err = nfs_do_writepage(page, wbc, &pgio);
+ nfs_pageio_init_write(&pgio, inode, 0, false,
+ &nfs_async_write_completion_ops);
+ err = nfs_do_writepage(folio, wbc, &pgio);
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
return err;
@@ -681,21 +679,23 @@ static int nfs_writepage_locked(struct page *page,
int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
int ret;
- ret = nfs_writepage_locked(page, wbc);
+ ret = nfs_writepage_locked(folio, wbc);
if (ret != AOP_WRITEPAGE_ACTIVATE)
unlock_page(page);
return ret;
}
-static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
+static int nfs_writepages_callback(struct folio *folio,
+ struct writeback_control *wbc, void *data)
{
int ret;
- ret = nfs_do_writepage(page, wbc, data);
+ ret = nfs_do_writepage(folio, wbc, data);
if (ret != AOP_WRITEPAGE_ACTIVATE)
- unlock_page(page);
+ folio_unlock(folio);
return ret;
}
@@ -749,10 +749,11 @@ out_err:
/*
* Insert a write request into an inode
*/
-static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct nfs_page *req)
{
- struct address_space *mapping = page_file_mapping(req->wb_page);
- struct nfs_inode *nfsi = NFS_I(inode);
+ struct folio *folio = nfs_page_to_folio(req);
+ struct address_space *mapping = folio_file_mapping(folio);
+ struct nfs_inode *nfsi = NFS_I(mapping->host);
WARN_ON_ONCE(req->wb_this_page != req);
@@ -764,10 +765,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
* with invalidate/truncate.
*/
spin_lock(&mapping->private_lock);
- if (likely(!PageSwapCache(req->wb_page))) {
+ if (likely(!folio_test_swapcache(folio))) {
set_bit(PG_MAPPED, &req->wb_flags);
- SetPagePrivate(req->wb_page);
- set_page_private(req->wb_page, (unsigned long)req);
+ folio_set_private(folio);
+ folio->private = req;
}
spin_unlock(&mapping->private_lock);
atomic_long_inc(&nfsi->nrequests);
@@ -784,47 +785,43 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
*/
static void nfs_inode_remove_request(struct nfs_page *req)
{
- struct address_space *mapping = page_file_mapping(req->wb_page);
- struct inode *inode = mapping->host;
- struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_page *head;
-
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
- head = req->wb_head;
+ struct folio *folio = nfs_page_to_folio(req->wb_head);
+ struct address_space *mapping = folio_file_mapping(folio);
spin_lock(&mapping->private_lock);
- if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
- set_page_private(head->wb_page, 0);
- ClearPagePrivate(head->wb_page);
- clear_bit(PG_MAPPED, &head->wb_flags);
+ if (likely(folio && !folio_test_swapcache(folio))) {
+ folio->private = NULL;
+ folio_clear_private(folio);
+ clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
}
spin_unlock(&mapping->private_lock);
}
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
nfs_release_request(req);
- atomic_long_dec(&nfsi->nrequests);
+ atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests);
}
}
-static void
-nfs_mark_request_dirty(struct nfs_page *req)
+static void nfs_mark_request_dirty(struct nfs_page *req)
{
- if (req->wb_page)
- __set_page_dirty_nobuffers(req->wb_page);
+ struct folio *folio = nfs_page_to_folio(req);
+ if (folio)
+ filemap_dirty_folio(folio_mapping(folio), folio);
}
/*
* nfs_page_search_commits_for_head_request_locked
*
- * Search through commit lists on @inode for the head request for @page.
+ * Search through commit lists on @inode for the head request for @folio.
* Must be called while holding the inode (which is cinfo) lock.
*
* Returns the head request if found, or NULL if not found.
*/
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
- struct page *page)
+ struct folio *folio)
{
struct nfs_page *freq, *t;
struct nfs_commit_info cinfo;
@@ -833,13 +830,13 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
nfs_init_cinfo_from_inode(&cinfo, inode);
/* search through pnfs commit lists */
- freq = pnfs_search_commit_reqs(inode, &cinfo, page);
+ freq = pnfs_search_commit_reqs(inode, &cinfo, folio);
if (freq)
return freq->wb_head;
/* Linearly search the commit list for the correct request */
list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
- if (freq->wb_page == page)
+ if (nfs_page_to_folio(freq) == folio)
return freq->wb_head;
}
@@ -887,8 +884,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- if (req->wb_page)
- nfs_mark_page_unstable(req->wb_page, cinfo);
+ nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
@@ -947,12 +943,15 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
nfs_request_add_commit_list(req, cinfo);
}
-static void
-nfs_clear_page_commit(struct page *page)
+static void nfs_folio_clear_commit(struct folio *folio)
{
- dec_node_page_state(page, NR_WRITEBACK);
- dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
- WB_WRITEBACK);
+ if (folio) {
+ long nr = folio_nr_pages(folio);
+
+ node_stat_mod_folio(folio, NR_WRITEBACK, -nr);
+ wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb,
+ WB_WRITEBACK, -nr);
+ }
}
/* Called holding the request lock on @req */
@@ -970,7 +969,7 @@ nfs_clear_request_commit(struct nfs_page *req)
nfs_request_remove_commit_list(req, &cinfo);
}
mutex_unlock(&NFS_I(inode)->commit_mutex);
- nfs_clear_page_commit(req->wb_page);
+ nfs_folio_clear_commit(nfs_page_to_folio(req));
}
}
@@ -1002,7 +1001,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
trace_nfs_comp_error(hdr->inode, req, hdr->error);
- nfs_mapping_set_error(req->wb_page, hdr->error);
+ nfs_mapping_set_error(nfs_page_to_folio(req),
+ hdr->error);
goto remove_req;
}
if (nfs_write_need_commit(hdr)) {
@@ -1016,7 +1016,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
remove_req:
nfs_inode_remove_request(req);
next:
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
out:
@@ -1092,10 +1092,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
* If the attempt fails, then the existing request is flushed out
* to disk.
*/
-static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
- struct page *page,
- unsigned int offset,
- unsigned int bytes)
+static struct nfs_page *nfs_try_to_update_request(struct folio *folio,
+ unsigned int offset,
+ unsigned int bytes)
{
struct nfs_page *req;
unsigned int rqend;
@@ -1104,7 +1103,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
end = offset + bytes;
- req = nfs_lock_and_join_requests(page);
+ req = nfs_lock_and_join_requests(folio);
if (IS_ERR_OR_NULL(req))
return req;
@@ -1137,7 +1136,7 @@ out_flushme:
*/
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
- error = nfs_wb_page(inode, page);
+ error = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
return (error < 0) ? ERR_PTR(error) : NULL;
}
@@ -1148,40 +1147,42 @@ out_flushme:
* if we have to add a new request. Also assumes that the caller has
* already called nfs_flush_incompatible() if necessary.
*/
-static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
- struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *nfs_setup_write_request(struct nfs_open_context *ctx,
+ struct folio *folio,
+ unsigned int offset,
+ unsigned int bytes)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *req;
- req = nfs_try_to_update_request(inode, page, offset, bytes);
+ req = nfs_try_to_update_request(folio, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, page, offset, bytes);
+ req = nfs_page_create_from_folio(ctx, folio, offset, bytes);
if (IS_ERR(req))
goto out;
- nfs_inode_add_request(inode, req);
+ nfs_inode_add_request(req);
out:
return req;
}
-static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
- unsigned int offset, unsigned int count)
+static int nfs_writepage_setup(struct nfs_open_context *ctx,
+ struct folio *folio, unsigned int offset,
+ unsigned int count)
{
- struct nfs_page *req;
+ struct nfs_page *req;
- req = nfs_setup_write_request(ctx, page, offset, count);
+ req = nfs_setup_write_request(ctx, folio, offset, count);
if (IS_ERR(req))
return PTR_ERR(req);
/* Update file length */
- nfs_grow_file(page, offset, count);
+ nfs_grow_file(folio, offset, count);
nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
}
-int nfs_flush_incompatible(struct file *file, struct page *page)
+int nfs_flush_incompatible(struct file *file, struct folio *folio)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx;
@@ -1197,12 +1198,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_head_request(page);
+ req = nfs_folio_find_head_request(folio);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
- do_flush = req->wb_page != page ||
- !nfs_match_open_context(nfs_req_openctx(req), ctx);
+ do_flush = nfs_page_to_folio(req) != folio ||
+ !nfs_match_open_context(nfs_req_openctx(req), ctx);
if (l_ctx && flctx &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock))) {
@@ -1211,7 +1212,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
nfs_release_request(req);
if (!do_flush)
return 0;
- status = nfs_wb_page(page_file_mapping(page)->host, page);
+ status = nfs_wb_folio(folio_file_mapping(folio)->host, folio);
} while (status == 0);
return status;
}
@@ -1283,9 +1284,9 @@ out:
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
-static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
- unsigned int pagelen)
+static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen)
{
+ struct inode *inode = folio_file_mapping(folio)->host;
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
@@ -1299,7 +1300,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
out:
if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
return false;
- return PageUptodate(page) != 0;
+ return folio_test_uptodate(folio) != 0;
}
static bool
@@ -1317,16 +1318,17 @@ is_whole_file_wrlock(struct file_lock *fl)
* If the file is opened for synchronous writes then we can just skip the rest
* of the checks.
*/
-static int nfs_can_extend_write(struct file *file, struct page *page,
- struct inode *inode, unsigned int pagelen)
+static int nfs_can_extend_write(struct file *file, struct folio *folio,
+ unsigned int pagelen)
{
- int ret;
+ struct inode *inode = file_inode(file);
struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
+ int ret;
if (file->f_flags & O_DSYNC)
return 0;
- if (!nfs_write_pageuptodate(page, inode, pagelen))
+ if (!nfs_folio_write_uptodate(folio, pagelen))
return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
@@ -1358,33 +1360,33 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
* things with a page scheduled for an RPC call (e.g. invalidate it).
*/
-int nfs_updatepage(struct file *file, struct page *page,
- unsigned int offset, unsigned int count)
+int nfs_update_folio(struct file *file, struct folio *folio,
+ unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct address_space *mapping = page_file_mapping(page);
- struct inode *inode = mapping->host;
- unsigned int pagelen = nfs_page_length(page);
+ struct address_space *mapping = folio_file_mapping(folio);
+ struct inode *inode = mapping->host;
+ unsigned int pagelen = nfs_folio_length(folio);
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
- dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
- file, count, (long long)(page_file_offset(page) + offset));
+ dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count,
+ (long long)(folio_file_pos(folio) + offset));
if (!count)
goto out;
- if (nfs_can_extend_write(file, page, inode, pagelen)) {
+ if (nfs_can_extend_write(file, folio, pagelen)) {
count = max(count + offset, pagelen);
offset = 0;
}
- status = nfs_writepage_setup(ctx, page, offset, count);
+ status = nfs_writepage_setup(ctx, folio, offset, count);
if (status < 0)
nfs_set_pageerror(mapping);
out:
- dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
+ dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n",
status, (long long)i_size_read(inode));
return status;
}
@@ -1420,13 +1422,13 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
- struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+ struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
atomic_long_inc(&nfsi->redirtied_pages);
- nfs_end_page_writeback(req);
+ nfs_page_end_writeback(req);
nfs_release_request(req);
}
@@ -1784,18 +1786,18 @@ void nfs_retry_commit(struct list_head *page_list,
req = nfs_list_entry(page_list->next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
- if (!cinfo->dreq)
- nfs_clear_page_commit(req->wb_page);
+ nfs_folio_clear_commit(nfs_page_to_folio(req));
nfs_unlock_and_release_request(req);
}
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);
-static void
-nfs_commit_resched_write(struct nfs_commit_info *cinfo,
- struct nfs_page *req)
+static void nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
{
- __set_page_dirty_nobuffers(req->wb_page);
+ struct folio *folio = nfs_page_to_folio(req);
+
+ filemap_dirty_folio(folio_mapping(folio), folio);
}
/*
@@ -1846,12 +1848,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
struct nfs_server *nfss;
+ struct folio *folio;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- if (req->wb_page)
- nfs_clear_page_commit(req->wb_page);
+ folio = nfs_page_to_folio(req);
+ nfs_folio_clear_commit(folio);
dprintk("NFS: commit (%s/%llu %d@%lld)",
nfs_req_openctx(req)->dentry->d_sb->s_id,
@@ -1859,10 +1862,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
- if (req->wb_page) {
+ if (folio) {
trace_nfs_commit_error(data->inode, req,
status);
- nfs_mapping_set_error(req->wb_page, status);
+ nfs_mapping_set_error(folio, status);
nfs_inode_remove_request(req);
}
dprintk_cont(", error = %d\n", status);
@@ -1873,7 +1876,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
* returned by the server against all stored verfs. */
if (nfs_write_match_verf(verf, req)) {
/* We have a match */
- if (req->wb_page)
+ if (folio)
nfs_inode_remove_request(req);
dprintk_cont(" OK\n");
goto next;
@@ -2054,7 +2057,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
/* blocking call to cancel all requests and join to a single (head)
* request */
- req = nfs_lock_and_join_requests(&folio->page);
+ req = nfs_lock_and_join_requests(folio);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
@@ -2070,13 +2073,18 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
return ret;
}
-/*
- * Write back all requests on one page - we do this before reading it.
+/**
+ * nfs_wb_folio - Write back all requests on one page
+ * @inode: pointer to page
+ * @folio: pointer to folio
+ *
+ * Assumes that the folio has been locked by the caller, and will
+ * not unlock it.
*/
-int nfs_wb_page(struct inode *inode, struct page *page)
+int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
- loff_t range_start = page_file_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
+ loff_t range_start = folio_file_pos(folio);
+ loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
@@ -2085,25 +2093,25 @@ int nfs_wb_page(struct inode *inode, struct page *page)
};
int ret;
- trace_nfs_writeback_page_enter(inode);
+ trace_nfs_writeback_folio(inode, folio);
for (;;) {
- wait_on_page_writeback(page);
- if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc);
+ folio_wait_writeback(folio);
+ if (folio_clear_dirty_for_io(folio)) {
+ ret = nfs_writepage_locked(folio, &wbc);
if (ret < 0)
goto out_error;
continue;
}
ret = 0;
- if (!PagePrivate(page))
+ if (!folio_test_private(folio))
break;
ret = nfs_commit_inode(inode, FLUSH_SYNC);
if (ret < 0)
goto out_error;
}
out_error:
- trace_nfs_writeback_page_exit(inode, ret);
+ trace_nfs_writeback_folio_done(inode, folio, ret);
return ret;
}
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 0a9b72685f98..1479583fbb62 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -9,6 +9,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
static unsigned int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
deleted file mode 100644
index 76bee0a0d308..000000000000
--- a/fs/nfsd/fault_inject.c
+++ /dev/null
@@ -1,142 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
- *
- * Uses debugfs to create fault injection points for client testing
- */
-
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/debugfs.h>
-#include <linux/module.h>
-#include <linux/nsproxy.h>
-#include <linux/sunrpc/addr.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-
-#include "state.h"
-#include "netns.h"
-
-struct nfsd_fault_inject_op {
- char *file;
- u64 (*get)(void);
- u64 (*set_val)(u64);
- u64 (*set_clnt)(struct sockaddr_storage *, size_t);
-};
-
-static struct dentry *debug_dir;
-
-static ssize_t fault_inject_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- static u64 val;
- char read_buf[25];
- size_t size;
- loff_t pos = *ppos;
- struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
-
- if (!pos)
- val = op->get();
- size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
-
- return simple_read_from_buffer(buf, len, ppos, read_buf, size);
-}
-
-static ssize_t fault_inject_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
-{
- char write_buf[INET6_ADDRSTRLEN];
- size_t size = min(sizeof(write_buf) - 1, len);
- struct net *net = current->nsproxy->net_ns;
- struct sockaddr_storage sa;
- struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
- u64 val;
- char *nl;
-
- if (copy_from_user(write_buf, buf, size))
- return -EFAULT;
- write_buf[size] = '\0';
-
- /* Deal with any embedded newlines in the string */
- nl = strchr(write_buf, '\n');
- if (nl) {
- size = nl - write_buf;
- *nl = '\0';
- }
-
- size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
- if (size > 0) {
- val = op->set_clnt(&sa, size);
- if (val)
- pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
- op->file, write_buf, val);
- } else {
- val = simple_strtoll(write_buf, NULL, 0);
- if (val == 0)
- pr_info("NFSD Fault Injection: %s (all)", op->file);
- else
- pr_info("NFSD Fault Injection: %s (n = %llu)",
- op->file, val);
- val = op->set_val(val);
- pr_info("NFSD: %s: found %llu", op->file, val);
- }
- return len; /* on success, claim we got the whole input */
-}
-
-static const struct file_operations fops_nfsd = {
- .owner = THIS_MODULE,
- .read = fault_inject_read,
- .write = fault_inject_write,
-};
-
-void nfsd_fault_inject_cleanup(void)
-{
- debugfs_remove_recursive(debug_dir);
-}
-
-static struct nfsd_fault_inject_op inject_ops[] = {
- {
- .file = "forget_clients",
- .get = nfsd_inject_print_clients,
- .set_val = nfsd_inject_forget_clients,
- .set_clnt = nfsd_inject_forget_client,
- },
- {
- .file = "forget_locks",
- .get = nfsd_inject_print_locks,
- .set_val = nfsd_inject_forget_locks,
- .set_clnt = nfsd_inject_forget_client_locks,
- },
- {
- .file = "forget_openowners",
- .get = nfsd_inject_print_openowners,
- .set_val = nfsd_inject_forget_openowners,
- .set_clnt = nfsd_inject_forget_client_openowners,
- },
- {
- .file = "forget_delegations",
- .get = nfsd_inject_print_delegations,
- .set_val = nfsd_inject_forget_delegations,
- .set_clnt = nfsd_inject_forget_client_delegations,
- },
- {
- .file = "recall_delegations",
- .get = nfsd_inject_print_delegations,
- .set_val = nfsd_inject_recall_delegations,
- .set_clnt = nfsd_inject_recall_client_delegations,
- },
-};
-
-void nfsd_fault_inject_init(void)
-{
- unsigned int i;
- struct nfsd_fault_inject_op *op;
- umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-
- debug_dir = debugfs_create_dir("nfsd", NULL);
-
- for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
- op = &inject_ops[i];
- debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd);
- }
-}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 45b2c9e3f636..6e8712bd7c99 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -331,37 +331,27 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
return nf;
}
+/**
+ * nfsd_file_check_write_error - check for writeback errors on a file
+ * @nf: nfsd_file to check for writeback errors
+ *
+ * Check whether a nfsd_file has an unseen error. Reset the write
+ * verifier if so.
+ */
static void
-nfsd_file_fsync(struct nfsd_file *nf)
-{
- struct file *file = nf->nf_file;
- int ret;
-
- if (!file || !(file->f_mode & FMODE_WRITE))
- return;
- ret = vfs_fsync(file, 1);
- trace_nfsd_file_fsync(nf, ret);
- if (ret)
- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
-}
-
-static int
nfsd_file_check_write_error(struct nfsd_file *nf)
{
struct file *file = nf->nf_file;
- if (!file || !(file->f_mode & FMODE_WRITE))
- return 0;
- return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+ if ((file->f_mode & FMODE_WRITE) &&
+ filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
}
static void
nfsd_file_hash_remove(struct nfsd_file *nf)
{
trace_nfsd_file_unhash(nf);
-
- if (nfsd_file_check_write_error(nf))
- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
nfsd_file_rhash_params);
}
@@ -387,23 +377,12 @@ nfsd_file_free(struct nfsd_file *nf)
this_cpu_add(nfsd_file_total_age, age);
nfsd_file_unhash(nf);
-
- /*
- * We call fsync here in order to catch writeback errors. It's not
- * strictly required by the protocol, but an nfsd_file could get
- * evicted from the cache before a COMMIT comes in. If another
- * task were to open that file in the interim and scrape the error,
- * then the client may never see it. By calling fsync here, we ensure
- * that writeback happens before the entry is freed, and that any
- * errors reported result in the write verifier changing.
- */
- nfsd_file_fsync(nf);
-
if (nf->nf_mark)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
get_file(nf->nf_file);
filp_close(nf->nf_file, NULL);
+ nfsd_file_check_write_error(nf);
fput(nf->nf_file);
}
@@ -452,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf)
struct nfsd_file *
nfsd_file_get(struct nfsd_file *nf)
{
- if (likely(refcount_inc_not_zero(&nf->nf_ref)))
+ if (nf && refcount_inc_not_zero(&nf->nf_ref))
return nf;
return NULL;
}
@@ -662,6 +641,39 @@ static struct shrinker nfsd_file_shrinker = {
};
/**
+ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
+ * @nf: nfsd_file to attempt to queue
+ * @dispose: private list to queue successfully-put objects
+ *
+ * Unhash an nfsd_file, try to get a reference to it, and then put that
+ * reference. If it's the last reference, queue it to the dispose list.
+ */
+static void
+nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ __must_hold(RCU)
+{
+ int decrement = 1;
+
+ /* If we raced with someone else unhashing, ignore it */
+ if (!nfsd_file_unhash(nf))
+ return;
+
+ /* If we can't get a reference, ignore it */
+ if (!nfsd_file_get(nf))
+ return;
+
+ /* Extra decrement if we remove from the LRU */
+ if (nfsd_file_lru_remove(nf))
+ ++decrement;
+
+ /* If refcount goes to 0, then put on the dispose list */
+ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+ list_add(&nf->nf_lru, dispose);
+ trace_nfsd_file_closing(nf);
+ }
+}
+
+/**
* nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
* @inode: inode on which to close out nfsd_files
* @dispose: list on which to gather nfsd_files to close out
@@ -688,30 +700,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
rcu_read_lock();
do {
- int decrement = 1;
-
nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
nfsd_file_rhash_params);
if (!nf)
break;
-
- /* If we raced with someone else unhashing, ignore it */
- if (!nfsd_file_unhash(nf))
- continue;
-
- /* If we can't get a reference, ignore it */
- if (!nfsd_file_get(nf))
- continue;
-
- /* Extra decrement if we remove from the LRU */
- if (nfsd_file_lru_remove(nf))
- ++decrement;
-
- /* If refcount goes to 0, then put on the dispose list */
- if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
- list_add(&nf->nf_lru, dispose);
- trace_nfsd_file_closing(nf);
- }
+ nfsd_file_cond_queue(nf, dispose);
} while (1);
rcu_read_unlock();
}
@@ -928,11 +921,8 @@ __nfsd_file_cache_purge(struct net *net)
nf = rhashtable_walk_next(&iter);
while (!IS_ERR_OR_NULL(nf)) {
- if (!net || nf->nf_net == net) {
- nfsd_file_unhash(nf);
- nfsd_file_lru_remove(nf);
- list_add(&nf->nf_lru, &dispose);
- }
+ if (!net || nf->nf_net == net)
+ nfsd_file_cond_queue(nf, &dispose);
nf = rhashtable_walk_next(&iter);
}
@@ -1071,8 +1061,8 @@ nfsd_file_is_cached(struct inode *inode)
static __be32
nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf,
- bool open, bool want_gc)
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
{
struct nfsd_file_lookup_key key = {
.type = NFSD_FILE_KEY_FULL,
@@ -1096,8 +1086,7 @@ retry:
rcu_read_lock();
nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
nfsd_file_rhash_params);
- if (nf)
- nf = nfsd_file_get(nf);
+ nf = nfsd_file_get(nf);
rcu_read_unlock();
if (nf) {
@@ -1147,8 +1136,8 @@ wait_for_construction:
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
- if (open)
- this_cpu_inc(nfsd_file_acquisitions);
+ this_cpu_inc(nfsd_file_acquisitions);
+ nfsd_file_check_write_error(nf);
*pnf = nf;
} else {
if (refcount_dec_and_test(&nf->nf_ref))
@@ -1158,20 +1147,23 @@ out:
out_status:
put_cred(key.cred);
- if (open)
- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
return status;
open_file:
trace_nfsd_file_alloc(nf);
nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
if (nf->nf_mark) {
- if (open) {
+ if (file) {
+ get_file(file);
+ nf->nf_file = file;
+ status = nfs_ok;
+ trace_nfsd_file_opened(nf, status);
+ } else {
status = nfsd_open_verified(rqstp, fhp, may_flags,
&nf->nf_file);
trace_nfsd_file_open(nf, status);
- } else
- status = nfs_ok;
+ }
} else
status = nfserr_jukebox;
/*
@@ -1207,7 +1199,7 @@ __be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
}
/**
@@ -1228,28 +1220,30 @@ __be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
}
/**
- * nfsd_file_create - Get a struct nfsd_file, do not open
+ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
* @rqstp: the RPC transaction being executed
* @fhp: the NFS filehandle of the file just created
* @may_flags: NFSD_MAY_ settings for the file
+ * @file: cached, already-open file (may be NULL)
* @pnf: OUT: new or found "struct nfsd_file" object
*
- * The nfsd_file_object returned by this API is reference-counted
- * but not garbage-collected. The object is released immediately
- * one RCU grace period after the final nfsd_file_put().
+ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
*
* Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
* network byte order is returned.
*/
__be32
-nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf)
+nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
+ return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
}
/*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index b7efb2c3ddb1..41516a4263ea 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
-__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **nfp);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..ec49b200b797 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -10,6 +10,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/filelock.h>
#include <linux/percpu_counter.h>
#include <linux/siphash.h>
@@ -195,7 +196,7 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1457f59f447a..12b2b9bc07bf 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -113,11 +113,11 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
inode_lock(inode);
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
argp->acl_default);
if (error)
goto out_drop_lock;
@@ -377,10 +377,11 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
},
};
-static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)]);
const struct svc_version nfsd_acl_version2 = {
.vs_vers = 2,
- .vs_nproc = 5,
+ .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures2),
.vs_proc = nfsd_acl_procedures2,
.vs_count = nfsd_acl_count2,
.vs_dispatch = nfsd_dispatch,
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 647108138e8a..73adca47d373 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -103,11 +103,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
inode_lock(inode);
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
argp->acl_default);
out_drop_lock:
@@ -266,10 +266,11 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
},
};
-static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)]);
const struct svc_version nfsd_acl_version3 = {
.vs_vers = 3,
- .vs_nproc = 3,
+ .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures3),
.vs_proc = nfsd_acl_procedures3,
.vs_count = nfsd_acl_count3,
.vs_dispatch = nfsd_dispatch,
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d01b29aba662..e6bb8eeb5bc2 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -320,7 +320,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode &= ~current_umask();
fh_fill_pre_attrs(fhp);
- host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
goto out;
@@ -1064,10 +1064,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
},
};
-static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count3[ARRAY_SIZE(nfsd_procedures3)]);
const struct svc_version nfsd_version3 = {
.vs_vers = 3,
- .vs_nproc = 22,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures3),
.vs_proc = nfsd_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_count = nfsd_count3,
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3564d1c6f610..e8a80052cb1b 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -323,11 +323,11 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
if (ls->ls_recalled)
goto out_unlock;
- ls->ls_recalled = true;
- atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
if (list_empty(&ls->ls_layouts))
goto out_unlock;
+ ls->ls_recalled = true;
+ atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
refcount_inc(&ls->ls_stid.sc_count);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bd880d55f565..5ae670807449 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
@@ -1214,8 +1214,10 @@ out:
return status;
out_put_dst:
nfsd_file_put(*dst);
+ *dst = NULL;
out_put_src:
nfsd_file_put(*src);
+ *src = NULL;
goto out;
}
@@ -1293,15 +1295,15 @@ extern void nfs_sb_deactive(struct super_block *sb);
* setup a work entry in the ssc delayed unmount list.
*/
static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
- struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt)
+ struct nfsd4_ssc_umount_item **nsui)
{
struct nfsd4_ssc_umount_item *ni = NULL;
struct nfsd4_ssc_umount_item *work = NULL;
struct nfsd4_ssc_umount_item *tmp;
DEFINE_WAIT(wait);
+ __be32 status = 0;
- *ss_mnt = NULL;
- *retwork = NULL;
+ *nsui = NULL;
work = kzalloc(sizeof(*work), GFP_KERNEL);
try_again:
spin_lock(&nn->nfsd_ssc_lock);
@@ -1318,18 +1320,19 @@ try_again:
/* allow 20secs for mount/unmount for now - revisit */
if (signal_pending(current) ||
(schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
return nfserr_eagain;
}
finish_wait(&nn->nfsd_ssc_waitq, &wait);
goto try_again;
}
- *ss_mnt = ni->nsui_vfsmount;
+ *nsui = ni;
refcount_inc(&ni->nsui_refcnt);
spin_unlock(&nn->nfsd_ssc_lock);
kfree(work);
- /* return vfsmount in ss_mnt */
+ /* return vfsmount in (*nsui)->nsui_vfsmount */
return 0;
}
if (work) {
@@ -1337,31 +1340,32 @@ try_again:
refcount_set(&work->nsui_refcnt, 2);
work->nsui_busy = true;
list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
- *retwork = work;
- }
+ *nsui = work;
+ } else
+ status = nfserr_resource;
spin_unlock(&nn->nfsd_ssc_lock);
- return 0;
+ return status;
}
-static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn,
- struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt)
+static void nfsd4_ssc_update_dul(struct nfsd_net *nn,
+ struct nfsd4_ssc_umount_item *nsui,
+ struct vfsmount *ss_mnt)
{
- /* set nsui_vfsmount, clear busy flag and wakeup waiters */
spin_lock(&nn->nfsd_ssc_lock);
- work->nsui_vfsmount = ss_mnt;
- work->nsui_busy = false;
+ nsui->nsui_vfsmount = ss_mnt;
+ nsui->nsui_busy = false;
wake_up_all(&nn->nfsd_ssc_waitq);
spin_unlock(&nn->nfsd_ssc_lock);
}
-static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn,
- struct nfsd4_ssc_umount_item *work)
+static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn,
+ struct nfsd4_ssc_umount_item *nsui)
{
spin_lock(&nn->nfsd_ssc_lock);
- list_del(&work->nsui_list);
+ list_del(&nsui->nsui_list);
wake_up_all(&nn->nfsd_ssc_waitq);
spin_unlock(&nn->nfsd_ssc_lock);
- kfree(work);
+ kfree(nsui);
}
/*
@@ -1369,7 +1373,7 @@ static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn,
*/
static __be32
nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
- struct vfsmount **mount)
+ struct nfsd4_ssc_umount_item **nsui)
{
struct file_system_type *type;
struct vfsmount *ss_mnt;
@@ -1380,7 +1384,6 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
char *ipaddr, *dev_name, *raw_data;
int len, raw_len;
__be32 status = nfserr_inval;
- struct nfsd4_ssc_umount_item *work = NULL;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
naddr = &nss->u.nl4_addr;
@@ -1388,6 +1391,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
naddr->addr_len,
(struct sockaddr *)&tmp_addr,
sizeof(tmp_addr));
+ *nsui = NULL;
if (tmp_addrlen == 0)
goto out_err;
@@ -1430,10 +1434,10 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
goto out_free_rawdata;
snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
- status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt);
+ status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
if (status)
goto out_free_devname;
- if (ss_mnt)
+ if ((*nsui)->nsui_vfsmount)
goto out_done;
/* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
@@ -1441,15 +1445,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
module_put(type->owner);
if (IS_ERR(ss_mnt)) {
status = nfserr_nodev;
- if (work)
- nfsd4_ssc_cancel_dul_work(nn, work);
+ nfsd4_ssc_cancel_dul(nn, *nsui);
goto out_free_devname;
}
- if (work)
- nfsd4_ssc_update_dul_work(nn, work, ss_mnt);
+ nfsd4_ssc_update_dul(nn, *nsui, ss_mnt);
out_done:
status = 0;
- *mount = ss_mnt;
out_free_devname:
kfree(dev_name);
@@ -1473,7 +1474,7 @@ out_err:
static __be32
nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
- struct nfsd4_copy *copy, struct vfsmount **mount)
+ struct nfsd4_copy *copy)
{
struct svc_fh *s_fh = NULL;
stateid_t *s_stid = &copy->cp_src_stateid;
@@ -1486,7 +1487,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
if (status)
goto out;
- status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount);
+ status = nfsd4_interssc_connect(copy->cp_src, rqstp, &copy->ss_nsui);
if (status)
goto out;
@@ -1504,45 +1505,26 @@ out:
}
static void
-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
+nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
struct nfsd_file *dst)
{
- bool found = false;
- long timeout;
- struct nfsd4_ssc_umount_item *tmp;
- struct nfsd4_ssc_umount_item *ni = NULL;
struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
+ long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
nfs42_ssc_close(filp);
- nfsd_file_put(dst);
fput(filp);
- if (!nn) {
- mntput(ss_mnt);
- return;
- }
spin_lock(&nn->nfsd_ssc_lock);
- timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
- list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
- if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
- list_del(&ni->nsui_list);
- /*
- * vfsmount can be shared by multiple exports,
- * decrement refcnt. If the count drops to 1 it
- * will be unmounted when nsui_expire expires.
- */
- refcount_dec(&ni->nsui_refcnt);
- ni->nsui_expire = jiffies + timeout;
- list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list);
- found = true;
- break;
- }
- }
+ list_del(&nsui->nsui_list);
+ /*
+ * vfsmount can be shared by multiple exports,
+ * decrement refcnt. If the count drops to 1 it
+ * will be unmounted when nsui_expire expires.
+ */
+ refcount_dec(&nsui->nsui_refcnt);
+ nsui->nsui_expire = jiffies + timeout;
+ list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list);
spin_unlock(&nn->nfsd_ssc_lock);
- if (!found) {
- mntput(ss_mnt);
- return;
- }
}
#else /* CONFIG_NFSD_V4_2_INTER_SSC */
@@ -1550,15 +1532,13 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
static __be32
nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
- struct nfsd4_copy *copy,
- struct vfsmount **mount)
+ struct nfsd4_copy *copy)
{
- *mount = NULL;
return nfserr_inval;
}
static void
-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
+nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
struct nfsd_file *dst)
{
}
@@ -1581,13 +1561,6 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
&copy->nf_dst);
}
-static void
-nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
-{
- nfsd_file_put(src);
- nfsd_file_put(dst);
-}
-
static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
struct nfsd4_cb_offload *cbo =
@@ -1699,18 +1672,27 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server));
memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
- dst->ss_mnt = src->ss_mnt;
+ dst->ss_nsui = src->ss_nsui;
+}
+
+static void release_copy_files(struct nfsd4_copy *copy)
+{
+ if (copy->nf_src)
+ nfsd_file_put(copy->nf_src);
+ if (copy->nf_dst)
+ nfsd_file_put(copy->nf_dst);
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
{
nfs4_free_copy_state(copy);
- nfsd_file_put(copy->nf_dst);
- if (!nfsd4_ssc_is_inter(copy))
- nfsd_file_put(copy->nf_src);
- spin_lock(&copy->cp_clp->async_lock);
- list_del(&copy->copies);
- spin_unlock(&copy->cp_clp->async_lock);
+ release_copy_files(copy);
+ if (copy->cp_clp) {
+ spin_lock(&copy->cp_clp->async_lock);
+ if (!list_empty(&copy->copies))
+ list_del_init(&copy->copies);
+ spin_unlock(&copy->cp_clp->async_lock);
+ }
nfs4_put_copy(copy);
}
@@ -1748,8 +1730,8 @@ static int nfsd4_do_async_copy(void *data)
if (nfsd4_ssc_is_inter(copy)) {
struct file *filp;
- filp = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
- &copy->stateid);
+ filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount,
+ &copy->c_fh, &copy->stateid);
if (IS_ERR(filp)) {
switch (PTR_ERR(filp)) {
case -EBADF:
@@ -1763,11 +1745,10 @@ static int nfsd4_do_async_copy(void *data)
}
nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
false);
- nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst);
+ nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
} else {
nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
copy->nf_dst->nf_file, false);
- nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
}
do_callback:
@@ -1789,8 +1770,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_notsupp;
goto out;
}
- status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
- &copy->ss_mnt);
+ status = nfsd4_setup_inter_ssc(rqstp, cstate, copy);
if (status)
return nfserr_offload_denied;
} else {
@@ -1809,12 +1789,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!async_copy)
goto out_err;
+ INIT_LIST_HEAD(&async_copy->copies);
+ refcount_set(&async_copy->refcount, 1);
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
goto out_err;
if (!nfs4_init_copy_state(nn, copy))
goto out_err;
- refcount_set(&async_copy->refcount, 1);
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
sizeof(copy->cp_res.cb_stateid));
dup_copy_fields(copy, async_copy);
@@ -1831,38 +1812,53 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
} else {
status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
copy->nf_dst->nf_file, true);
- nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
}
out:
+ release_copy_files(copy);
return status;
out_err:
+ if (nfsd4_ssc_is_inter(copy)) {
+ /*
+ * Source's vfsmount of inter-copy will be unmounted
+ * by the laundromat. Use copy instead of async_copy
+ * since async_copy->ss_nsui might not be set yet.
+ */
+ refcount_dec(&copy->ss_nsui->nsui_refcnt);
+ }
if (async_copy)
cleanup_async_copy(async_copy);
status = nfserrno(-ENOMEM);
- /*
- * source's vfsmount of inter-copy will be unmounted
- * by the laundromat
- */
goto out;
}
-struct nfsd4_copy *
-find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+static struct nfsd4_copy *
+find_async_copy_locked(struct nfs4_client *clp, stateid_t *stateid)
{
struct nfsd4_copy *copy;
- spin_lock(&clp->async_lock);
+ lockdep_assert_held(&clp->async_lock);
+
list_for_each_entry(copy, &clp->async_copies, copies) {
if (memcmp(&copy->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE))
continue;
- refcount_inc(&copy->refcount);
- spin_unlock(&clp->async_lock);
return copy;
}
- spin_unlock(&clp->async_lock);
return NULL;
}
+static struct nfsd4_copy *
+find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+{
+ struct nfsd4_copy *copy;
+
+ spin_lock(&clp->async_lock);
+ copy = find_async_copy_locked(clp, stateid);
+ if (copy)
+ refcount_inc(&copy->refcount);
+ spin_unlock(&clp->async_lock);
+ return copy;
+}
+
static __be32
nfsd4_offload_cancel(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
@@ -1947,22 +1943,24 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd_file_put(nf);
return status;
}
+
static __be32
nfsd4_offload_status(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_offload_status *os = &u->offload_status;
- __be32 status = 0;
+ __be32 status = nfs_ok;
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
- copy = find_async_copy(clp, &os->stateid);
- if (copy) {
+ spin_lock(&clp->async_lock);
+ copy = find_async_copy_locked(clp, &os->stateid);
+ if (copy)
os->count = copy->cp_res.wr_bytes_written;
- nfs4_put_copy(copy);
- } else
+ else
status = nfserr_bad_stateid;
+ spin_unlock(&clp->async_lock);
return status;
}
@@ -2607,12 +2605,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
cstate->minorversion = args->minorversion;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
-
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -2734,7 +2731,7 @@ encode_op:
out:
cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}
@@ -3619,12 +3616,13 @@ static const struct svc_procedure nfsd_procedures4[2] = {
},
};
-static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count4[ARRAY_SIZE(nfsd_procedures4)]);
const struct svc_version nfsd_version4 = {
.vs_vers = 4,
- .vs_nproc = 2,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures4),
.vs_proc = nfsd_procedures4,
- .vs_count = nfsd_count3,
+ .vs_count = nfsd_count4,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS4_SVC_XDRSIZE,
.vs_rpcb_optnl = true,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 78b8cd9651d5..3509e73abe1f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU);
+ status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
out_put:
dput(dentry);
out_unlock:
@@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
status = -ENOENT;
if (d_really_is_negative(dentry))
goto out;
- status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry);
out:
dput(dentry);
out_unlock:
@@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
if (nfs4_has_reclaimed_state(name, nn))
goto out_free;
- status = vfs_rmdir(&init_user_ns, d_inode(parent), child);
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b2ee535ade8..6e61fa3acaf1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -600,23 +600,15 @@ put_nfs4_file(struct nfs4_file *fi)
}
static struct nfsd_file *
-__nfs4_get_fd(struct nfs4_file *f, int oflag)
-{
- if (f->fi_fds[oflag])
- return nfsd_file_get(f->fi_fds[oflag]);
- return NULL;
-}
-
-static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f)
{
struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_WRONLY);
+ ret = nfsd_file_get(f->fi_fds[O_WRONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
return ret;
}
@@ -639,9 +631,9 @@ find_readable_file_locked(struct nfs4_file *f)
lockdep_assert_held(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_RDONLY);
+ ret = nfsd_file_get(f->fi_fds[O_RDONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
return ret;
}
@@ -665,11 +657,11 @@ find_any_file(struct nfs4_file *f)
if (!f)
return NULL;
spin_lock(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
if (!ret) {
- ret = __nfs4_get_fd(f, O_WRONLY);
+ ret = nfsd_file_get(f->fi_fds[O_WRONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDONLY);
+ ret = nfsd_file_get(f->fi_fds[O_RDONLY]);
}
spin_unlock(&f->fi_lock);
return ret;
@@ -688,15 +680,6 @@ static struct nfsd_file *find_any_file_locked(struct nfs4_file *f)
return NULL;
}
-static struct nfsd_file *find_deleg_file_locked(struct nfs4_file *f)
-{
- lockdep_assert_held(&f->fi_lock);
-
- if (f->fi_deleg_file)
- return f->fi_deleg_file;
- return NULL;
-}
-
static atomic_long_t num_delegations;
unsigned long max_delegations;
@@ -992,7 +975,6 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
- stid->cs_type = cs_type;
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
@@ -1003,6 +985,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
idr_preload_end();
if (new_id < 0)
return 0;
+ stid->cs_type = cs_type;
return 1;
}
@@ -1036,7 +1019,8 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy)
{
struct nfsd_net *nn;
- WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID);
+ if (copy->cp_stateid.cs_type != NFS4_COPY_STID)
+ return;
nn = net_generic(copy->cp_clp->net, nfsd_net_id);
spin_lock(&nn->s2s_cp_lock);
idr_remove(&nn->s2s_cp_stateids,
@@ -2705,7 +2689,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
ds = delegstateid(st);
nf = st->sc_file;
spin_lock(&nf->fi_lock);
- file = find_deleg_file_locked(nf);
+ file = nf->fi_deleg_file;
if (!file)
goto out;
@@ -4411,7 +4395,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
if (!count)
count = atomic_long_read(&num_delegations);
if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
return (unsigned long)count;
}
@@ -4421,7 +4405,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
-int
+void
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
@@ -4443,16 +4427,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
- nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
-}
-
-void
-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
-{
- unregister_shrinker(&nn->nfsd_client_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -5262,18 +5236,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- if (!open->op_filp) {
- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- } else {
- status = nfsd_file_create(rqstp, cur_fh, access, &nf);
- if (status != nfs_ok)
- goto out_put_access;
- nf->nf_file = open->op_filp;
- open->op_filp = NULL;
- trace_nfsd_file_create(rqstp, access, nf);
- }
+ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
+ open->op_filp, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
@@ -5316,16 +5282,17 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
/* test and set deny mode */
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
- if (status == nfs_ok) {
- if (status != nfserr_share_denied) {
- set_deny(open->op_share_deny, stp);
- fp->fi_share_deny |=
- (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
- } else {
- if (nfs4_resolve_deny_conflicts_locked(fp, false,
- stp, open->op_share_deny, false))
- status = nfserr_jukebox;
- }
+ switch (status) {
+ case nfs_ok:
+ set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |=
+ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ break;
+ case nfserr_share_denied:
+ if (nfs4_resolve_deny_conflicts_locked(fp, false,
+ stp, open->op_share_deny, false))
+ status = nfserr_jukebox;
+ break;
}
spin_unlock(&fp->fi_lock);
@@ -5374,7 +5341,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
{
struct nfs4_ol_stateid *st;
struct file *f = fp->fi_deleg_file->nf_file;
- struct inode *ino = locks_inode(f);
+ struct inode *ino = file_inode(f);
int writes;
writes = atomic_read(&ino->i_writecount);
@@ -5456,6 +5423,23 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
return 0;
}
+/*
+ * We avoid breaking delegations held by a client due to its own activity, but
+ * clearing setuid/setgid bits on a write is an implicit activity and the client
+ * may not notice and continue using the old mode. Avoid giving out a delegation
+ * on setuid/setgid files when the client is requesting an open for write.
+ */
+static int
+nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
+{
+ struct inode *inode = file_inode(nf->nf_file);
+
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) &&
+ (inode->i_mode & (S_ISUID|S_ISGID)))
+ return -EAGAIN;
+ return 0;
+}
+
static struct nfs4_delegation *
nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent)
@@ -5489,6 +5473,8 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
+ else if (nfsd4_verify_setuid_write(open, nf))
+ status = -EAGAIN;
else if (!fp->fi_deleg_file) {
fp->fi_deleg_file = nf;
/* increment early to prevent fi_deleg_file from being
@@ -5529,6 +5515,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (status)
goto out_unlock;
+ /*
+ * Now that the deleg is set, check again to ensure that nothing
+ * raced in and changed the mode while we weren't lookng.
+ */
+ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
+ if (status)
+ goto out_unlock;
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (fp->fi_had_conflict)
@@ -6243,8 +6237,7 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);
courtesy_client_reaper(nn);
@@ -6425,23 +6418,26 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
+ struct nfsd_file *ret = NULL;
+
if (!s)
return NULL;
switch (s->sc_type) {
case NFS4_DELEG_STID:
- if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
- return NULL;
- return nfsd_file_get(s->sc_file->fi_deleg_file);
+ spin_lock(&s->sc_file->fi_lock);
+ ret = nfsd_file_get(s->sc_file->fi_deleg_file);
+ spin_unlock(&s->sc_file->fi_lock);
+ break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
- return find_readable_file(s->sc_file);
+ ret = find_readable_file(s->sc_file);
else
- return find_writeable_file(s->sc_file);
+ ret = find_writeable_file(s->sc_file);
}
- return NULL;
+ return ret;
}
static __be32
@@ -6566,8 +6562,19 @@ void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
spin_unlock(&nn->s2s_cp_lock);
}
-/*
- * Checks for stateid operations
+/**
+ * nfs4_preprocess_stateid_op - find and prep stateid for an operation
+ * @rqstp: incoming request from client
+ * @cstate: current compound state
+ * @fhp: filehandle associated with requested stateid
+ * @stateid: stateid (provided by client)
+ * @flags: flags describing type of operation to be done
+ * @nfp: optional nfsd_file return pointer (may be NULL)
+ * @cstid: optional returned nfs4_stid pointer (may be NULL)
+ *
+ * Given info from the client, look up a nfs4_stid for the operation. On
+ * success, it returns a reference to the nfs4_stid and/or the nfsd_file
+ * associated with it.
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
@@ -6756,8 +6763,18 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
return status;
}
-/*
- * Checks for sequence id mutating operations.
+/**
+ * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op
+ * @cstate: compund state
+ * @seqid: seqid (provided by client)
+ * @stateid: stateid (provided by client)
+ * @typemask: mask of allowable types for this operation
+ * @stpp: return pointer for the stateid found
+ * @nn: net namespace for request
+ *
+ * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and
+ * return it in @stpp. On a nfs_ok return, the returned stateid will
+ * have its st_mutex locked.
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
@@ -7828,7 +7845,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
return status;
}
- inode = locks_inode(nf->nf_file);
+ inode = file_inode(nf->nf_file);
flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
@@ -8074,11 +8091,20 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+
+ if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"))
+ goto err_shrinker;
return 0;
+err_shrinker:
+ put_net(net);
+ kfree(nn->sessionid_hashtbl);
err_sessionid:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
@@ -8171,6 +8197,8 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -8190,7 +8218,6 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
- rhltable_destroy(&nfs4_file_rhltable);
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
nfsd4_ssc_shutdown_umount(nn);
#endif
@@ -8200,6 +8227,7 @@ void
nfs4_state_shutdown(void)
{
nfsd4_destroy_callback_queue();
+ rhltable_destroy(&nfs4_file_rhltable);
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2b4ae858c89b..e12e5a4ad502 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
return true;
}
@@ -2965,7 +2965,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
}
- err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ err = vfs_getattr(&path, &stat,
+ STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
+ AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
if (!(stat.result_mask & STATX_BTIME))
@@ -3629,6 +3631,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
case nfserr_noent:
xdr_truncate_encode(xdr, start_offset);
goto skip_entry;
+ case nfserr_jukebox:
+ /*
+ * The pseudoroot should only display dentries that lead to
+ * exports. If we get EJUKEBOX here, then we can't tell whether
+ * this entry should be included. Just fail the whole READDIR
+ * with NFS4ERR_DELAY in that case, and hope that the situation
+ * will resolve itself by the client's next attempt.
+ */
+ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT)
+ goto fail;
+ fallthrough;
default:
/*
* If the client requested the RDATTR_ERROR attribute,
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 3e64a3d50a1c..041faa13b852 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -488,7 +488,7 @@ found_entry:
case RC_NOCACHE:
break;
case RC_REPLSTAT:
- svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+ xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
rtn = RC_REPLY;
break;
case RC_REPLBUFF:
@@ -509,7 +509,7 @@ out_trace:
* nfsd_cache_update - Update an entry in the duplicate reply cache.
* @rqstp: svc_rqst with a finished Reply
* @cachetype: which cache to update
- * @statp: Reply's status code
+ * @statp: pointer to Reply's NFS status code, or NULL
*
* This is called from nfsd_dispatch when the procedure has been
* executed and the complete reply is in rqstp->rq_res.
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d1e581a60480..7b8f17ee5224 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -14,7 +14,6 @@
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/gss_api.h>
-#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/module.h>
#include <linux/fsnotify.h>
@@ -47,7 +46,6 @@ enum {
NFSD_MaxBlkSize,
NFSD_MaxConnections,
NFSD_Filecache,
- NFSD_SupportedEnctypes,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -187,16 +185,6 @@ static int export_features_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(export_features);
-#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
-static int supported_enctypes_show(struct seq_file *m, void *v)
-{
- seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(supported_enctypes);
-#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
-
static const struct file_operations pool_stats_operations = {
.open = nfsd_pool_stats_open,
.read = seq_read,
@@ -1150,6 +1138,9 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
break;
+ case S_IFLNK:
+ inode->i_op = &simple_symlink_inode_operations;
+ break;
default:
break;
}
@@ -1195,6 +1186,54 @@ out_err:
goto out;
}
+#if IS_ENABLED(CONFIG_SUNRPC_GSS)
+static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
+ umode_t mode, const char *content)
+{
+ struct inode *inode;
+
+ inode = nfsd_get_inode(dir->i_sb, mode);
+ if (!inode)
+ return -ENOMEM;
+
+ inode->i_link = (char *)content;
+ inode->i_size = strlen(content);
+
+ d_add(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_create(dir, dentry);
+ return 0;
+}
+
+/*
+ * @content is assumed to be a NUL-terminated string that lives
+ * longer than the symlink itself.
+ */
+static void nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ int ret;
+
+ inode_lock(dir);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ goto out;
+ ret = __nfsd_symlink(d_inode(parent), dentry, S_IFLNK | 0777, content);
+ if (ret)
+ dput(dentry);
+out:
+ inode_unlock(dir);
+}
+#else
+static inline void nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
+{
+}
+
+#endif
+
static void clear_ncl(struct inode *inode)
{
struct nfsdfs_client *ncl = inode->i_private;
@@ -1355,10 +1394,6 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
-#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
- [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes",
- &supported_enctypes_fops, S_IRUGO},
-#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
@@ -1371,6 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret)
return ret;
+ nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
+ "/proc/net/rpc/gss_krb5_enctypes");
dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -1457,21 +1494,12 @@ static __net_init int nfsd_init_net(struct net *net)
goto out_idmap_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
- retval = nfsd4_init_leases_net(nn);
- if (retval)
- goto out_drc_error;
- retval = nfsd_reply_cache_init(nn);
- if (retval)
- goto out_cache_error;
+ nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
return 0;
-out_cache_error:
- nfsd4_leases_net_shutdown(nn);
-out_drc_error:
- nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
@@ -1480,13 +1508,9 @@ out_export_error:
static __net_exit void nfsd_exit_net(struct net *net)
{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
- nfsd4_leases_net_shutdown(nn);
}
static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 93b42ef9ed91..d88498f8b275 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -86,7 +86,7 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp,
* Function prototypes.
*/
int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
-int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+int nfsd_dispatch(struct svc_rqst *rqstp);
int nfsd_nrthreads(struct net *);
int nfsd_nrpools(struct net *);
@@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void);
extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
#endif
-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
+extern void nfsd4_init_leases_net(struct nfsd_net *nn);
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
@@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
return 0;
}
-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
+static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 8c52b6c9d31a..ccd8485fee04 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -40,7 +40,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
/* make sure parents give x permission to user */
int err;
parent = dget_parent(tdentry);
- err = inode_permission(&init_user_ns,
+ err = inode_permission(&nop_mnt_idmap,
d_inode(parent), MAY_EXEC);
if (err < 0) {
dput(parent);
@@ -628,6 +628,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
stat.mtime = inode->i_mtime;
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
+ if (v4 && IS_I_VERSION(inode)) {
+ stat.change_cookie = inode_query_iversion(inode);
+ stat.result_mask |= STATX_CHANGE_COOKIE;
+ }
}
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -659,6 +663,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
if (err) {
fhp->fh_post_saved = false;
fhp->fh_post_attr.ctime = inode->i_ctime;
+ if (v4 && IS_I_VERSION(inode)) {
+ fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
+ fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
+ }
} else
fhp->fh_post_saved = true;
if (v4)
@@ -748,3 +756,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
return FSIDSOURCE_UUID;
return FSIDSOURCE_DEV;
}
+
+/*
+ * We could use i_version alone as the change attribute. However, i_version
+ * can go backwards on a regular file after an unclean shutdown. On its own
+ * that doesn't necessarily cause a problem, but if i_version goes backwards
+ * and then is incremented again it could reuse a value that was previously
+ * used before boot, and a client who queried the two values might incorrectly
+ * assume nothing changed.
+ *
+ * By using both ctime and the i_version counter we guarantee that as long as
+ * time doesn't go backwards we never reuse an old value. If the filesystem
+ * advertises STATX_ATTR_CHANGE_MONOTONIC, then this mitigation is not
+ * needed.
+ *
+ * We only need to do this for regular files as well. For directories, we
+ * assume that the new change attr is always logged to stable storage in some
+ * fashion before the results can be seen.
+ */
+u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(inode->i_mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 513e028b0bbe..4e0ecf0ae2cf 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -293,34 +293,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
fhp->fh_pre_saved = false;
}
-/*
- * We could use i_version alone as the change attribute. However,
- * i_version can go backwards after a reboot. On its own that doesn't
- * necessarily cause a problem, but if i_version goes backwards and then
- * is incremented again it could reuse a value that was previously used
- * before boot, and a client who queried the two values might
- * incorrectly assume nothing changed.
- *
- * By using both ctime and the i_version counter we guarantee that as
- * long as time doesn't go backwards we never reuse an old value.
- */
-static inline u64 nfsd4_change_attribute(struct kstat *stat,
- struct inode *inode)
-{
- if (inode->i_sb->s_export_op->fetch_iversion)
- return inode->i_sb->s_export_op->fetch_iversion(inode);
- else if (IS_I_VERSION(inode)) {
- u64 chattr;
-
- chattr = stat->ctime.tv_sec;
- chattr <<= 30;
- chattr += stat->ctime.tv_nsec;
- chattr += inode_query_iversion(inode);
- return chattr;
- } else
- return time_to_chattr(&stat->ctime);
-}
-
+u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
extern void fh_fill_pre_attrs(struct svc_fh *fhp);
extern void fh_fill_post_attrs(struct svc_fh *fhp);
extern void fh_fill_both_attrs(struct svc_fh *fhp);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a5570cf75f3f..c37195572fd0 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -93,7 +93,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
- setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) {
+ setattr_prepare(&nop_mnt_idmap, fhp->fh_dentry, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
return rpc_success;
}
@@ -838,11 +838,11 @@ static const struct svc_procedure nfsd_procedures2[18] = {
},
};
-
-static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count2[ARRAY_SIZE(nfsd_procedures2)]);
const struct svc_version nfsd_version2 = {
.vs_vers = 2,
- .vs_nproc = 18,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures2),
.vs_proc = nfsd_procedures2,
.vs_count = nfsd_count2,
.vs_dispatch = nfsd_dispatch,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 56fba1cba3af..9c7b1ef5be40 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -363,7 +363,7 @@ void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
do {
read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
- memcpy(verf, nn->writeverf, sizeof(*verf));
+ memcpy(verf, nn->writeverf, sizeof(nn->writeverf));
} while (need_seqretry(&nn->writeverf_lock, seq));
done_seqretry(&nn->writeverf_lock, seq);
}
@@ -427,16 +427,23 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
ret = nfsd_file_cache_start_net(net);
if (ret)
goto out_lockd;
- ret = nfs4_state_start_net(net);
+
+ ret = nfsd_reply_cache_init(nn);
if (ret)
goto out_filecache;
+ ret = nfs4_state_start_net(net);
+ if (ret)
+ goto out_reply_cache;
+
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
nfsd4_ssc_init_umount_work(nn);
#endif
nn->nfsd_net_up = true;
return 0;
+out_reply_cache:
+ nfsd_reply_cache_shutdown(nn);
out_filecache:
nfsd_file_cache_shutdown_net(net);
out_lockd:
@@ -453,8 +460,9 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_file_cache_shutdown_net(net);
nfs4_state_shutdown_net(net);
+ nfsd_reply_cache_shutdown(nn);
+ nfsd_file_cache_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
nn->lockd_up = false;
@@ -1022,7 +1030,6 @@ out:
/**
* nfsd_dispatch - Process an NFS or NFSACL Request
* @rqstp: incoming request
- * @statp: pointer to location of accept_stat field in RPC Reply buffer
*
* This RPC dispatcher integrates the NFS server's duplicate reply cache.
*
@@ -1030,9 +1037,10 @@ out:
* %0: Processing complete; do not send a Reply
* %1: Processing complete; send Reply in rqstp->rq_res
*/
-int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+int nfsd_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *proc = rqstp->rq_procinfo;
+ __be32 *statp = rqstp->rq_accept_statp;
/*
* Give the xdr decoder a chance to change this if it wants
@@ -1040,7 +1048,6 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
*/
rqstp->rq_cachetype = proc->pc_cachetype;
- svcxdr_init_decode(rqstp);
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
@@ -1053,12 +1060,6 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
goto out_dropit;
}
- /*
- * Need to grab the location to store the status, as
- * NFSv4 does some encoding while processing
- */
- svcxdr_init_encode(rqstp);
-
*statp = proc->pc_func(rqstp);
if (test_bit(RQ_DROPME, &rqstp->rq_flags))
goto out_update_drop;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e94634d30591..d49d3060ed4f 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -705,8 +705,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
void put_nfs4_file(struct nfs4_file *fi);
-extern struct nfsd4_copy *
-find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
struct nfs4_cpntf_state *cps);
extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index c852ae8eaf37..4183819ea082 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire,
)
);
-TRACE_EVENT(nfsd_file_create,
- TP_PROTO(
- const struct svc_rqst *rqstp,
- unsigned int may_flags,
- const struct nfsd_file *nf
- ),
-
- TP_ARGS(rqstp, may_flags, nf),
-
- TP_STRUCT__entry(
- __field(const void *, nf_inode)
- __field(const void *, nf_file)
- __field(unsigned long, may_flags)
- __field(unsigned long, nf_flags)
- __field(unsigned long, nf_may)
- __field(unsigned int, nf_ref)
- __field(u32, xid)
- ),
-
- TP_fast_assign(
- __entry->nf_inode = nf->nf_inode;
- __entry->nf_file = nf->nf_file;
- __entry->may_flags = may_flags;
- __entry->nf_flags = nf->nf_flags;
- __entry->nf_may = nf->nf_may;
- __entry->nf_ref = refcount_read(&nf->nf_ref);
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
- ),
-
- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
- __entry->xid, __entry->nf_inode,
- show_nfsd_may_flags(__entry->may_flags),
- __entry->nf_ref, show_nf_flags(__entry->nf_flags),
- show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
- )
-);
-
TRACE_EVENT(nfsd_file_insert_err,
TP_PROTO(
const struct svc_rqst *rqstp,
@@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err,
)
);
-TRACE_EVENT(nfsd_file_open,
- TP_PROTO(struct nfsd_file *nf, __be32 status),
+DECLARE_EVENT_CLASS(nfsd_file_open_class,
+ TP_PROTO(const struct nfsd_file *nf, __be32 status),
TP_ARGS(nf, status),
TP_STRUCT__entry(
__field(void *, nf_inode) /* cannot be dereferenced */
@@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open,
__entry->nf_file)
)
+#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
+DEFINE_EVENT(nfsd_file_open_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf, \
+ __be32 status \
+ ), \
+ TP_ARGS(nf, status))
+
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
+
TRACE_EVENT(nfsd_file_is_cached,
TP_PROTO(
const struct inode *inode,
@@ -1228,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close,
)
);
-TRACE_EVENT(nfsd_file_fsync,
- TP_PROTO(
- const struct nfsd_file *nf,
- int ret
- ),
- TP_ARGS(nf, ret),
- TP_STRUCT__entry(
- __field(void *, nf_inode)
- __field(int, nf_ref)
- __field(int, ret)
- __field(unsigned long, nf_flags)
- __field(unsigned char, nf_may)
- __field(struct file *, nf_file)
- ),
- TP_fast_assign(
- __entry->nf_inode = nf->nf_inode;
- __entry->nf_ref = refcount_read(&nf->nf_ref);
- __entry->ret = ret;
- __entry->nf_flags = nf->nf_flags;
- __entry->nf_may = nf->nf_may;
- __entry->nf_file = nf->nf_file;
- ),
- TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
- __entry->nf_inode,
- __entry->nf_ref,
- show_nf_flags(__entry->nf_flags),
- show_nfsd_may_flags(__entry->nf_may),
- __entry->nf_file, __entry->ret
- )
-);
-
#include "cache.h"
TRACE_DEFINE_ENUM(RC_DROPIT);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4c3a0d84043c..e7462b5e5f1e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -126,9 +126,13 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct dentry *dentry = *dpp;
struct path path = {.mnt = mntget(exp->ex_path.mnt),
.dentry = dget(dentry)};
+ unsigned int follow_flags = 0;
int err = 0;
- err = follow_down(&path);
+ if (exp->ex_flags & NFSEXP_CROSSMOUNT)
+ follow_flags = LOOKUP_AUTOMOUNT;
+
+ err = follow_down(&path, follow_flags);
if (err < 0)
goto out;
if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
@@ -223,7 +227,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
return 1;
if (nfsd4_is_junction(dentry))
return 1;
- if (d_mountpoint(dentry))
+ if (d_managed(dentry))
/*
* Might only be a mountpoint in a different namespace,
* but we need to check.
@@ -426,7 +430,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
if (iap->ia_size < 0)
return -EFBIG;
- host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+ host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL);
if (host_err)
return host_err;
iap->ia_valid &= ~ATTR_SIZE;
@@ -444,7 +448,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
return 0;
iap->ia_valid |= ATTR_CTIME;
- return notify_change(&init_user_ns, dentry, iap, NULL);
+ return notify_change(&nop_mnt_idmap, dentry, iap, NULL);
}
/**
@@ -542,12 +546,12 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_labelerr = security_inode_setsecctx(dentry,
attr->na_seclabel->data, attr->na_seclabel->len);
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
- attr->na_aclerr = set_posix_acl(&init_user_ns,
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_ACCESS,
attr->na_pacl);
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
!attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
- attr->na_aclerr = set_posix_acl(&init_user_ns,
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
inode_unlock(inode);
@@ -583,7 +587,7 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
if (!(inode->i_mode & S_ISVTX))
return 0;
- if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME,
+ if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME,
NULL, 0) <= 0)
return 0;
return 1;
@@ -1363,12 +1367,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = 0;
switch (type) {
case S_IFREG:
- host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, dirp, dchild,
+ iap->ia_mode, true);
if (!host_err)
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode);
+ host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
if (!host_err && unlikely(d_unhashed(dchild))) {
struct dentry *d;
d = lookup_one_len(dchild->d_name.name,
@@ -1396,7 +1401,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = vfs_mknod(&init_user_ns, dirp, dchild,
+ host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
iap->ia_mode, rdev);
break;
default:
@@ -1557,7 +1562,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_drop_write;
}
fh_fill_pre_attrs(fhp);
- host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
+ host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
@@ -1625,7 +1630,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (d_really_is_negative(dold))
goto out_dput;
fh_fill_pre_attrs(ffhp);
- host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL);
+ host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
if (!host_err) {
@@ -1745,10 +1750,10 @@ retry:
goto out_dput_old;
} else {
struct renamedata rd = {
- .old_mnt_userns = &init_user_ns,
+ .old_mnt_idmap = &nop_mnt_idmap,
.old_dir = fdir,
.old_dentry = odentry,
- .new_mnt_userns = &init_user_ns,
+ .new_mnt_idmap = &nop_mnt_idmap,
.new_dir = tdir,
.new_dentry = ndentry,
};
@@ -1850,14 +1855,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
nfsd_close_cached_files(rdentry);
for (retries = 1;;) {
- host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
+ host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL);
if (host_err != -EAGAIN || !retries--)
break;
if (!nfsd_wait_for_delegreturn(rqstp, rinode))
break;
}
} else {
- host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
+ host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry);
}
fh_fill_post_attrs(fhp);
@@ -2129,7 +2134,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
inode_lock_shared(inode);
- len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0);
/*
* Zero-length attribute, just return.
@@ -2156,7 +2161,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
goto out;
}
- len = vfs_getxattr(&init_user_ns, dentry, name, buf, len);
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len);
if (len <= 0) {
kvfree(buf);
buf = NULL;
@@ -2267,7 +2272,7 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
inode_lock(fhp->fh_dentry->d_inode);
fh_fill_pre_attrs(fhp);
- ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry,
+ ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
name, NULL);
fh_fill_post_attrs(fhp);
@@ -2294,7 +2299,7 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
inode_lock(fhp->fh_dentry->d_inode);
fh_fill_pre_attrs(fhp);
- ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf,
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
len, flags, NULL);
fh_fill_post_attrs(fhp);
inode_unlock(fhp->fh_dentry->d_inode);
@@ -2378,14 +2383,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
- err = inode_permission(&init_user_ns, inode,
+ err = inode_permission(&nop_mnt_idmap, inode,
acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
(acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
- err = inode_permission(&init_user_ns, inode, MAY_EXEC);
+ err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC);
return err? nfserrno(err) : 0;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index dbdfef7ae85b..43fb57a301d3 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -170,9 +170,14 @@ static inline void fh_drop_write(struct svc_fh *fh)
static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
{
+ u32 request_mask = STATX_BASIC_STATS;
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
- return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
+
+ if (fh->fh_maxsize == NFS4_FHSIZE)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+
+ return nfserrno(vfs_getattr(&p, stat, request_mask,
AT_STATX_SYNC_AS_STAT));
}
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4fd2cf6d1d2d..510978e602da 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -571,7 +571,7 @@ struct nfsd4_copy {
struct task_struct *copy_task;
refcount_t refcount;
- struct vfsmount *ss_mnt;
+ struct nfsd4_ssc_umount_item *ss_nsui;
struct nfs_fh c_fh;
nfs4_stateid stateid;
};