From 140e049c64ce848392adbf4678983ecc76888dde Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 17:42:42 -0500 Subject: NFS: Add a helper to set attribute barriers Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 83107be3dd01..b0cbc1ba82da 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1260,6 +1260,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr) } EXPORT_SYMBOL_GPL(nfs_fattr_init); +/** + * nfs_fattr_set_barrier + * @fattr: attributes + * + * Used to set a barrier after an attribute was updated. This + * barrier ensures that older attributes from RPC calls that may + * have raced with our update cannot clobber these new values. + * Note that you are still responsible for ensuring that other + * operations which change the attribute on the server do not + * collide. + */ +void nfs_fattr_set_barrier(struct nfs_fattr *fattr) +{ + fattr->gencount = nfs_inc_attr_generation_counter(); +} + struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; -- cgit v1.2.3-59-g8ed1b From f044636d972246d451e06226cc1675d5da389762 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 16:09:04 -0500 Subject: NFS: Add attribute update barriers to nfs_setattr_update_inode() Ensure that other operations which raced with our setattr RPC call cannot revert the file attribute changes that were made on the server. To do so, we artificially bump the attribute generation counter on the inode so that all calls to nfs_fattr_init() that precede ours will be dropped. The motivation for the patch came from Chuck Lever's reports of readaheads racing with truncate operations and causing the file size to be reverted. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 17 ++++++++++++----- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/proc.c | 2 +- include/linux/nfs_fs.h | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b0cbc1ba82da..3a2d127de499 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr); * This is a copy of the common vmtruncate, but with the locking * corrected to take into account the fact that NFS requires * inode->i_size to be updated under the inode->i_lock. + * Note: must be called with inode->i_lock held! */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { @@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; - spin_lock(&inode->i_lock); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + spin_lock(&inode->i_lock); out: return err; } @@ -585,10 +586,15 @@ out: * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. */ -void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, + struct nfs_fattr *fattr) { + /* Barrier: bump the attribute generation count. */ + nfs_fattr_set_barrier(fattr); + + spin_lock(&inode->i_lock); + NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { - spin_lock(&inode->i_lock); if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_gid = attr->ia_gid; nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); - spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } + nfs_update_inode(inode, fattr); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 78e557c3ab87..11109a137c0c 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4e41340e957d..c499e02a58ca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2416,8 +2416,8 @@ static int _nfs4_do_open(struct inode *dir, opendata->o_res.f_attr, sattr, state, label, olabel); if (status == 0) { - nfs_setattr_update_inode(state->inode, sattr); - nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); + nfs_setattr_update_inode(state->inode, sattr, + opendata->o_res.f_attr); nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } } @@ -3291,7 +3291,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); if (status == 0) { - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); nfs_setsecurity(inode, fattr, label); } nfs4_label_free(label); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b09cc23d6f43..6202bc0f11bb 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3a4ffb5856cd..f26e64e0aff8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -356,7 +356,7 @@ extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *ino extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); -extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); +extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); -- cgit v1.2.3-59-g8ed1b From f5062003465c20cfe584d9129a463322ad5cf4ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 19:34:32 -0500 Subject: NFS: Set an attribute barrier on all updates Ensure that we update the attribute barrier even if there were no invalidations, provided that this value is newer than the old one. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3a2d127de499..299bf7171a4d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1738,6 +1738,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; + /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { @@ -1745,6 +1746,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; } + /* Set the barrier to be more recent than this fattr */ + if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + nfsi->attr_gencount = fattr->gencount; } invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ -- cgit v1.2.3-59-g8ed1b From a08a8cd375db9769588257e7782f6b6b68561b88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 17:36:09 -0500 Subject: NFS: Add attribute update barriers to NFS writebacks Ensure that other operations that race with our write RPC calls cannot revert the file size updates that were made on the server. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 25 ++++++++++++++++++++++--- fs/nfs/internal.h | 1 + fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4proc.c | 2 +- fs/nfs/proc.c | 4 +--- fs/nfs/write.c | 30 ++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 1 + 7 files changed, 57 insertions(+), 8 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 299bf7171a4d..ff9a6795da46 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1491,7 +1491,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** - * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes * @@ -1501,11 +1501,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); * * This function is mainly designed to be used by the ->write_done() functions. */ -int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) { int status; - spin_lock(&inode->i_lock); /* Don't do a WCC update if these attributes are already stale */ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !nfs_inode_attrs_need_update(inode, fattr)) { @@ -1537,6 +1536,26 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa } out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr); + return status; +} + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b802fb3a2d99..9e6475bc5ba2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 11109a137c0c..1f11d2533ee4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c499e02a58ca..b022e64b76a5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4237,7 +4237,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), hdr->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); + nfs_writeback_update_inode(hdr); } return 0; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 6202bc0f11bb..c63189acd052 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = hdr->inode; - if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 595d81e354d1..849ed784d6ac 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode) return 0; } +static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, + struct nfs_fattr *fattr) +{ + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; + + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return; + if (argp->offset + resp->count != fattr->size) + return; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + return; + /* Set attribute barrier */ + nfs_fattr_set_barrier(fattr); +} + +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) +{ + struct nfs_fattr *fattr = hdr->res.fattr; + struct inode *inode = hdr->inode; + + if (fattr == NULL) + return; + spin_lock(&inode->i_lock); + nfs_writeback_check_extend(hdr, fattr); + nfs_post_op_update_inode_force_wcc_locked(inode, fattr); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_writeback_update_inode); + /* * This function is called when the WRITE call is complete. */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f26e64e0aff8..59b1516b9fd4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -343,6 +343,7 @@ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); +extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); -- cgit v1.2.3-59-g8ed1b From 8f8ba1d739b7047e2e1d91735716af2799ff2b1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 17:54:58 -0500 Subject: NFSv4: Add attribute update barriers to delegreturn and pNFS layoutcommit Ensure that other operations that race with delegreturn and layoutcommit cannot revert the attribute updates that were made on the server. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ff9a6795da46..cd094d652199 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1555,6 +1555,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa int status; spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; -- cgit v1.2.3-59-g8ed1b From 00fb4c9f8421c9aac3947d36ffe8e049b95f7ab1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 17:57:14 -0500 Subject: NFS: Remove size hack in nfs_inode_attrs_need_update() Prior to this patch, we used to always OK attribute updates that extended the file size on the assumption that we might be performing writeback. Now that we have attribute barriers to protect the writeback related updates, we should remove this hack, as it can cause truncate() operations to apparently be reverted if/when a readahead or getattr RPC call races with our on-the-wire SETATTR. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index cd094d652199..fef65d1e024e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1238,13 +1238,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; } -static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) - return 0; - return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); -} - static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) @@ -1393,7 +1386,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || nfs_ctime_need_update(inode, fattr) || - nfs_size_need_update(inode, fattr) || ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } -- cgit v1.2.3-59-g8ed1b From 92d64e47b67b5e7fe1b5358402ab222a32ec3479 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 19:48:26 -0500 Subject: NFS: Fix nfs_post_op_update_inode() to set an attribute barrier nfs_post_op_update_inode() is called after a self-induced attribute update. Ensure that it also sets the barrier. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fef65d1e024e..c66c1df467f4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1475,6 +1475,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) int status; spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); -- cgit v1.2.3-59-g8ed1b From 3235b40303b6f609c446275d0e7f6f9f4fe94156 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Feb 2015 19:52:06 -0500 Subject: NFSv4: Set a barrier in the update_changeattr() helper Ensure that we don't regress the changes that were made to the directory. Signed-off-by: Trond Myklebust Tested-by: Chuck Lever --- fs/nfs/inode.c | 1 + fs/nfs/nfs4proc.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c66c1df467f4..5026c44a98e1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1249,6 +1249,7 @@ unsigned long nfs_inc_attr_generation_counter(void) { return atomic_long_inc_return(&nfs_attr_generation_counter); } +EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter); void nfs_fattr_init(struct nfs_fattr *fattr) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b022e64b76a5..a211daf58c32 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } -- cgit v1.2.3-59-g8ed1b From 874f946376de57c8d6230b30ad71f742883fee3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Mar 2015 23:32:08 -0500 Subject: NFS: Fix a regression in the read() syscall When invalidating the page cache for a regular file, we want to first sync all dirty data to disk and then call invalidate_inode_pages2(). The latter relies on nfs_launder_page() and nfs_release_page() to deal respectively with dirty pages, and unstable written pages. When commit 9590544694bec ("NFS: avoid deadlocks with loop-back mounted NFS filesystems.") changed the behaviour of nfs_release_page(), then it made it possible for invalidate_inode_pages2() to fail with an EBUSY. Unfortunately, that error is then propagated back to read(). Let's therefore work around the problem for now by protecting the call to sync the data and invalidate_inode_pages2() so that they are atomic w.r.t. the addition of new writes. Later on, we can revisit whether or not we still need nfs_launder_page() and nfs_release_page(). Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ++-- fs/nfs/inode.c | 37 ++++++++++++++++++++++++++++++++++--- include/linux/nfs_fs.h | 1 + 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c045c7169fa0..41963ffca597 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); if (result > 0) @@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", filp, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_mapping(inode, filp->f_mapping); + res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); if (res > 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5026c44a98e1..8edb7d049565 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1067,11 +1067,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode) } /** - * nfs_revalidate_mapping - Revalidate the pagecache + * __nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping + * @may_lock - take inode->i_mutex? */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +static int __nfs_revalidate_mapping(struct inode *inode, + struct address_space *mapping, + bool may_lock) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; @@ -1120,7 +1123,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); - ret = nfs_invalidate_mapping(inode, mapping); + if (may_lock) { + mutex_lock(&inode->i_mutex); + ret = nfs_invalidate_mapping(inode, mapping); + mutex_unlock(&inode->i_mutex); + } else + ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); @@ -1130,6 +1138,29 @@ out: return ret; } +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, false); +} + +/** + * nfs_revalidate_mapping_protected - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + * + * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex + * while invalidating the mapping. + */ +int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, true); +} + static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 59b1516b9fd4..b01ccf371fdc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -356,6 +356,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); +extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, -- cgit v1.2.3-59-g8ed1b From ef070dcb3989f553f5d84edf555eebc7e204099d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Mar 2015 00:06:35 -0500 Subject: NFS: Don't write enable new pages while an invalidation is proceeding nfs_vm_page_mkwrite() should wait until the page cache invalidation is finished. This is the second patch in a 2 patch series to deprecate the NFS client's reliance on nfs_release_page() in the context of nfs_invalidate_mapping(). Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 3 +++ fs/nfs/inode.c | 1 + 2 files changed, 4 insertions(+) (limited to 'fs/nfs/inode.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 41963ffca597..e679d24c39d3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -623,6 +623,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* make sure the cache has finished storing the page */ nfs_fscache_wait_on_page_write(NFS_I(inode), page); + wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + lock_page(page); mapping = page_file_mapping(page); if (mapping != inode->i_mapping) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8edb7d049565..d42dff6d5e98 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1035,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { + unmap_mapping_range(mapping, 0, 0, 0); ret = nfs_sync_mapping(mapping); if (ret < 0) return ret; -- cgit v1.2.3-59-g8ed1b