From 9494b2ce4b511e6d285afc0a5006b6cf5ea2b115 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:45 +0100
Subject: nfs: pass on count for CLONE operations

Currently we pass uninitialized stack garbage in the count parameter.
The value is usually large enought to clone whole files and thus let
simple tests pass, but it makes the tests for range clones very unhappy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42proc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 3e92a3cde15d..303d22ef4f37 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -284,6 +284,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
 		.dst_fh = NFS_FH(dst_inode),
 		.src_offset = src_offset,
 		.dst_offset = dst_offset,
+		.count = count,
 		.dst_bitmask = server->cache_consistency_bitmask,
 	};
 	struct nfs42_clone_res res = {
-- 
cgit v1.2.3-59-g8ed1b


From 3a2e176905b1a8d1b51cbfda719058f5ce893efd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:46 +0100
Subject: nfs: offer native ioctls even if CONFIG_COMPAT is set

Without this for example 64-bit binaries on typical amd64 distributions
would not be able to use ioctls on NFS.  For now this only affects clones.
Additionally ->compat_ioctl is defined even for non-compat builds, so
get rid of the pointless ifdef.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4file.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4aa571956cd6..e45f686a5a60 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -347,9 +347,6 @@ const struct file_operations nfs4_file_operations = {
 #endif /* CONFIG_NFS_V4_2 */
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
-#ifdef CONFIG_COMPAT
 	.unlocked_ioctl = nfs4_ioctl,
-#else
 	.compat_ioctl	= nfs4_ioctl,
-#endif /* CONFIG_COMPAT */
 };
-- 
cgit v1.2.3-59-g8ed1b


From 21fad313d5890b674432fe3ad0c7bcf040320340 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:47 +0100
Subject: nfs: allow intra-file CLONE

Originally CLONE didn't allow for intra-file clones, but we recently
updated the spec to support this feature which is also supported by
local Linux file systems.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4file.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e45f686a5a60..61f1c1c02d06 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -203,6 +203,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
 	struct fd src_file;
 	struct inode *src_inode;
 	unsigned int bs = server->clone_blksize;
+	bool same_inode = false;
 	int ret;
 
 	/* dst file must be opened for writing */
@@ -221,10 +222,8 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
 
 	src_inode = file_inode(src_file.file);
 
-	/* src and dst must be different files */
-	ret = -EINVAL;
 	if (src_inode == dst_inode)
-		goto out_fput;
+		same_inode = true;
 
 	/* src file must be opened for reading */
 	if (!(src_file.file->f_mode & FMODE_READ))
@@ -249,8 +248,16 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
 			goto out_fput;
 	}
 
+	/* verify if ranges are overlapped within the same file */
+	if (same_inode) {
+		if (dst_off + count > src_off && dst_off < src_off + count)
+			goto out_fput;
+	}
+
 	/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
-	if (dst_inode < src_inode) {
+	if (same_inode) {
+		mutex_lock(&src_inode->i_mutex);
+	} else if (dst_inode < src_inode) {
 		mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
 		mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
 	} else {
@@ -275,7 +282,9 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
 		truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
 
 out_unlock:
-	if (dst_inode < src_inode) {
+	if (same_inode) {
+		mutex_unlock(&src_inode->i_mutex);
+	} else if (dst_inode < src_inode) {
 		mutex_unlock(&src_inode->i_mutex);
 		mutex_unlock(&dst_inode->i_mutex);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From 0f42a6a9b807b092841f7e1b381f8c7e80a0d86a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:48 +0100
Subject: nfs: use btrfs ioctl defintions for clone

The NFS CLONE_RANGE defintion was wrong and thus never worked.  Fix this
by simply using the btrfs ioctl defintion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4file.c        | 10 ++++++----
 include/uapi/linux/nfs.h | 11 -----------
 2 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 61f1c1c02d06..135353074c25 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,6 +7,7 @@
 #include <linux/file.h>
 #include <linux/falloc.h>
 #include <linux/nfs_fs.h>
+#include <uapi/linux/btrfs.h>	/* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */
 #include "delegation.h"
 #include "internal.h"
 #include "iostat.h"
@@ -300,12 +301,13 @@ out_drop_write:
 
 static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
 {
-	struct nfs_ioctl_clone_range_args args;
+	struct btrfs_ioctl_clone_range_args args;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
 		return -EFAULT;
 
-	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count);
+	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
+				 args.dest_offset, args.src_length);
 }
 #else
 static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
@@ -325,9 +327,9 @@ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
-	case NFS_IOC_CLONE:
+	case BTRFS_IOC_CLONE:
 		return nfs42_ioctl_clone(file, arg, 0, 0, 0);
-	case NFS_IOC_CLONE_RANGE:
+	case BTRFS_IOC_CLONE_RANGE:
 		return nfs42_ioctl_clone_range(file, argp);
 	}
 
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 654bae3f1a38..5e6296160361 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -33,17 +33,6 @@
 
 #define NFS_PIPE_DIRNAME "nfs"
 
-/* NFS ioctls */
-/* Let's follow btrfs lead on CLONE to avoid messing userspace */
-#define NFS_IOC_CLONE		_IOW(0x94, 9, int)
-#define NFS_IOC_CLONE_RANGE	_IOW(0x94, 13, int)
-
-struct nfs_ioctl_clone_range_args {
-	__s64 src_fd;
-	__u64 src_off, count;
-	__u64 dst_off;
-};
-
 /*
  * NFS stats. The good thing with these values is that NFSv3 errors are
  * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
-- 
cgit v1.2.3-59-g8ed1b


From 6b7153da2c1a58b0e809a8c50bcc3bab7374ce7b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:49 +0100
Subject: nfs: reduce the amount of ifdefs for v4.2 in nfs4file.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4file.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 135353074c25..db9b5fea5b3e 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -309,18 +309,6 @@ static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
 	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
 				 args.dest_offset, args.src_length);
 }
-#else
-static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
-		u64 src_off, u64 dst_off, u64 count)
-{
-	return -ENOTTY;
-}
-
-static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
-{
-	return -ENOTTY;
-}
-#endif /* CONFIG_NFS_V4_2 */
 
 long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -335,13 +323,9 @@ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	return -ENOTTY;
 }
+#endif /* CONFIG_NFS_V4_2 */
 
 const struct file_operations nfs4_file_operations = {
-#ifdef CONFIG_NFS_V4_2
-	.llseek		= nfs4_file_llseek,
-#else
-	.llseek		= nfs_file_llseek,
-#endif
 	.read_iter	= nfs_file_read,
 	.write_iter	= nfs_file_write,
 	.mmap		= nfs_file_mmap,
@@ -353,11 +337,14 @@ const struct file_operations nfs4_file_operations = {
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-#ifdef CONFIG_NFS_V4_2
-	.fallocate	= nfs42_fallocate,
-#endif /* CONFIG_NFS_V4_2 */
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
+#ifdef CONFIG_NFS_V4_2
+	.llseek		= nfs4_file_llseek,
+	.fallocate	= nfs42_fallocate,
 	.unlocked_ioctl = nfs4_ioctl,
 	.compat_ioctl	= nfs4_ioctl,
+#else
+	.llseek		= nfs_file_llseek,
+#endif
 };
-- 
cgit v1.2.3-59-g8ed1b


From 291e1b9459936c7e86951736d432e223b0710225 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Mon, 16 Nov 2015 14:51:07 -0500
Subject: NFS: Properly set NFS v4.2 NFSDBG_FACILITY

NFS v4.2 operations can work outside of pNFS, so dprintk() output
shouldn't be placed under NFSDBG_PNFS.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 303d22ef4f37..6b1ce9825430 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -14,7 +14,7 @@
 #include "pnfs.h"
 #include "internal.h"
 
-#define NFSDBG_FACILITY NFSDBG_PNFS
+#define NFSDBG_FACILITY NFSDBG_PROC
 
 static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
 				fmode_t fmode)
-- 
cgit v1.2.3-59-g8ed1b


From f54423a1f8fb0da4226a982618d2c703e413d4d6 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 18 Nov 2015 10:39:26 +0800
Subject: NFS4: Cleanup FATTR4_WORD0_FS_LOCATIONS after decoding success

Commit 1ca843a2d2 "nfs: Fix GETATTR bitmap verification" has check
the bitmap after decoding success, but decode_attr_fs_locations forgets
cleanup the FATTR4_WORD0_FS_LOCATIONS bits.

decode_getfattr_attrs always return -EIO when meeting FS_LOCATIONS now.

ls: cannot access /mnt/referal: Input/output error
ls: cannot access /mnt/replicas: Input/output error
total 32
drwxr-xr-x. 7 root root 8192 Nov 16 20:36 pnfs
??????????? ? ?    ?       ?            ? referal
??????????? ? ?    ?       ?            ? replicas

v2: clear the bit earlier

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dfed4f5c8fcc..4e4441216804 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3615,6 +3615,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
 	status = 0;
 	if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
 		goto out;
+	bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS;
 	status = -EIO;
 	/* Ignore borken servers that return unrequested attrs */
 	if (unlikely(res == NULL))
-- 
cgit v1.2.3-59-g8ed1b


From 91ab4b4d16e6649fbbf65f303c0c4e20ed680bd1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Thu, 19 Nov 2015 14:30:26 -0500
Subject: nfs: use sliding delay when LAYOUTGET gets NFS4ERR_DELAY

When LAYOUTGET gets NFS4ERR_DELAY, we currently will wait 15s before
retrying the call. That is a _very_ long time, so add a timeout value to
struct nfs4_layoutget and pass nfs4_async_handle_error a pointer to it.
This allows the RPC engine to use a sliding delay window, instead of a
15s delay.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 2 +-
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 765a03559363..89818036f035 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7866,7 +7866,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			spin_unlock(&inode->i_lock);
 		goto out_restart;
 	}
-	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)
 		goto out_restart;
 out:
 	dprintk("<-- %s\n", __func__);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 570d630f98ae..11bbae44f4cb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,6 +251,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
 	gfp_t gfp_flags;
+	long timeout;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.3-59-g8ed1b


From c68a027c05709330fe5b2f50c50d5fa02124b5d8 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 20 Nov 2015 09:56:20 -0500
Subject: nfs4: start callback_ident at idr 1

If clp->cl_cb_ident is zero, then nfs_cb_idr_remove_locked() skips removing
it when the nfs_client is freed.  A decoding or server bug can then find
and try to put that first nfs_client which would lead to a crash.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Fixes: d6870312659d ("nfs4client: convert to idr_alloc()")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 223bedda64ae..10410e8b5853 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 		return ret;
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->nfs_client_lock);
-	ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+	ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
 	if (ret >= 0)
 		clp->cl_cb_ident = ret;
 	spin_unlock(&nn->nfs_client_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 38b7631fbe42e6e247e9fc9879f961b14a687e3b Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 20 Nov 2015 09:55:30 -0500
Subject: nfs4: limit callback decoding to received bytes

A truncated cb_compound request will cause the client to decode null or
data from a previous callback for nfs4.1 backchannel case, or uninitialized
data for the nfs4.0 case. This is because the path through
svc_process_common() advances the request's iov_base and decrements iov_len
without adjusting the overall xdr_buf's len field.  That causes
xdr_init_decode() to set up the xdr_stream with an incorrect length in
nfs4_callback_compound().

Fixing this for the nfs4.1 backchannel case first requires setting the
correct iov_len and page_len based on the length of received data in the
same manner as the nfs4.0 case.

Then the request's xdr_buf length can be adjusted for both cases based upon
the remaining iov_len and page_len.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_xdr.c         | 7 +++++--
 net/sunrpc/backchannel_rqst.c | 8 ++++++++
 net/sunrpc/svc.c              | 1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac73488..beac58b0e09c 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -78,7 +78,8 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
 
 	p = xdr_inline_decode(xdr, nbytes);
 	if (unlikely(p == NULL))
-		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
+		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed "
+							"or truncated request.\n");
 	return p;
 }
 
@@ -889,6 +890,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
+	struct xdr_buf *rq_arg = &rqstp->rq_arg;
 	__be32 *p, status;
 	struct cb_process_state cps = {
 		.drc_status = 0,
@@ -900,7 +902,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	dprintk("%s: start\n", __func__);
 
-	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+	rq_arg->len = rq_arg->head[0].iov_len + rq_arg->page_len;
+	xdr_init_decode(&xdr_in, rq_arg, rq_arg->head[0].iov_base);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 229956bf8457..95f82d8d4888 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -353,12 +353,20 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct svc_serv *bc_serv = xprt->bc_serv;
+	struct xdr_buf *rq_rcv_buf = &req->rq_rcv_buf;
 
 	spin_lock(&xprt->bc_pa_lock);
 	list_del(&req->rq_bc_pa_list);
 	xprt_dec_alloc_count(xprt, 1);
 	spin_unlock(&xprt->bc_pa_lock);
 
+	if (copied <= rq_rcv_buf->head[0].iov_len) {
+		rq_rcv_buf->head[0].iov_len = copied;
+		rq_rcv_buf->page_len = 0;
+	} else {
+		rq_rcv_buf->page_len = copied - rq_rcv_buf->head[0].iov_len;
+	}
+
 	req->rq_private_buf.len = copied;
 	set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc5b7b5032ca..7fccf9675df8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1363,6 +1363,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
 	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
 	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+	rqstp->rq_arg.len = req->rq_private_buf.len;
 
 	/* reset result send buffer "put" position */
 	resv->iov_len = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 616c319683e34d10952b74a73ee12e080531b476 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Wed, 25 Nov 2015 13:50:45 -0500
Subject: nfs: ensure that attrcache is revalidated after a SETATTR

If we get no post-op attributes back from a SETATTR operation, then no
attributes will of course be updated during the call to
nfs_update_inode.

We know however that the attributes are invalid at that point, since we
just changed some of them. At the very least, the ctime will be bogus.
If we get no post-op attributes back on the call, mark the attrcache
invalid to reflect that fact.

Reviewed-by: Steve French <steve.french@primarydata.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e10d833..eda3da3f3a7b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -618,7 +618,10 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		nfs_vmtruncate(inode, attr->ia_size);
 	}
-	nfs_update_inode(inode, fattr);
+	if (fattr->valid)
+		nfs_update_inode(inode, fattr);
+	else
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
-- 
cgit v1.2.3-59-g8ed1b


From c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Wed, 25 Nov 2015 13:50:11 -0500
Subject: nfs: if we have no valid attrs, then don't declare the attribute
 cache valid

If we pass in an empty nfs_fattr struct to nfs_update_inode, it will
(correctly) not update any of the attributes, but it then clears the
NFS_INO_INVALID_ATTR flag, which indicates that the attributes are
up to date. Don't clear the flag if the fattr struct has no valid
attrs to apply.

Reviewed-by: Steve French <steve.french@primarydata.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index eda3da3f3a7b..31b0a52223a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1827,7 +1827,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
 			nfsi->attr_gencount = fattr->gencount;
 	}
-	invalid &= ~NFS_INO_INVALID_ATTR;
+
+	/* Don't declare attrcache up to date if there were no attrs! */
+	if (fattr->valid != 0)
+		invalid &= ~NFS_INO_INVALID_ATTR;
+
 	/* Don't invalidate the data if we were to blame */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 				|| S_ISLNK(inode->i_mode)))
-- 
cgit v1.2.3-59-g8ed1b


From 4f2e9dce0c6348a95eaa56ade9bab18572221088 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Wed, 25 Nov 2015 13:43:14 -0500
Subject: nfs4: resend LAYOUTGET when there is a race that changes the seqid

pnfs_layout_process will check the returned layout stateid against what
the kernel has in-core. If it turns out that the stateid we received is
older, then we should resend the LAYOUTGET instead of falling back to
MDS I/O.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Cc: stable@vger.kernel.org # 3.18+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 56 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 93496c059837..5a8ae2125b50 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
 	dprintk("--> %s\n", __func__);
 
-	lgp = kzalloc(sizeof(*lgp), gfp_flags);
-	if (lgp == NULL)
-		return NULL;
+	/*
+	 * Synchronously retrieve layout information from server and
+	 * store in lseg. If we race with a concurrent seqid morphing
+	 * op, then re-send the LAYOUTGET.
+	 */
+	do {
+		lgp = kzalloc(sizeof(*lgp), gfp_flags);
+		if (lgp == NULL)
+			return NULL;
+
+		i_size = i_size_read(ino);
+
+		lgp->args.minlength = PAGE_CACHE_SIZE;
+		if (lgp->args.minlength > range->length)
+			lgp->args.minlength = range->length;
+		if (range->iomode == IOMODE_READ) {
+			if (range->offset >= i_size)
+				lgp->args.minlength = 0;
+			else if (i_size - range->offset < lgp->args.minlength)
+				lgp->args.minlength = i_size - range->offset;
+		}
+		lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+		lgp->args.range = *range;
+		lgp->args.type = server->pnfs_curr_ld->id;
+		lgp->args.inode = ino;
+		lgp->args.ctx = get_nfs_open_context(ctx);
+		lgp->gfp_flags = gfp_flags;
+		lgp->cred = lo->plh_lc_cred;
 
-	i_size = i_size_read(ino);
+		lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+	} while (lseg == ERR_PTR(-EAGAIN));
 
-	lgp->args.minlength = PAGE_CACHE_SIZE;
-	if (lgp->args.minlength > range->length)
-		lgp->args.minlength = range->length;
-	if (range->iomode == IOMODE_READ) {
-		if (range->offset >= i_size)
-			lgp->args.minlength = 0;
-		else if (i_size - range->offset < lgp->args.minlength)
-			lgp->args.minlength = i_size - range->offset;
-	}
-	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
-	lgp->args.range = *range;
-	lgp->args.type = server->pnfs_curr_ld->id;
-	lgp->args.inode = ino;
-	lgp->args.ctx = get_nfs_open_context(ctx);
-	lgp->gfp_flags = gfp_flags;
-	lgp->cred = lo->plh_lc_cred;
-
-	/* Synchronously retrieve layout information from server and
-	 * store in lseg.
-	 */
-	lseg = nfs4_proc_layoutget(lgp, gfp_flags);
 	if (IS_ERR(lseg)) {
 		switch (PTR_ERR(lseg)) {
 		case -ENOMEM:
@@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 		/* existing state ID, make sure the sequence number matches. */
 		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
 			dprintk("%s forget reply due to sequence\n", __func__);
+			status = -EAGAIN;
 			goto out_forget_reply;
 		}
 		pnfs_set_layout_stateid(lo, &res->stateid, false);
-- 
cgit v1.2.3-59-g8ed1b