From ddeff520f02b92128132c282c350fa72afffb84a Mon Sep 17 00:00:00 2001
From: Nikita Danilov <nikita@clusterfs.com>
Date: Wed, 9 Aug 2006 13:53:47 -0400
Subject: NFS: Fix a potential deadlock in nfs_release_page

nfs_wb_page() waits on request completion and, as a result, is not safe to be
called from nfs_release_page() invoked by VM scanner as part of GFP_NOFS
allocation. Fix possible deadlock by analyzing gfp mask and refusing to
release page if __GFP_FS is not set.

Signed-off-by: Nikita Danilov <danilov@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 374d969debfb290bafcb41d28918dc6f7e43ce31 commit)
---
 fs/nfs/file.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874ad5a4..48e892880d5b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	if (gfp & __GFP_FS)
+		return !nfs_wb_page(page->mapping->host, page);
+	else
+		/*
+		 * Avoid deadlock on nfs_wait_on_request().
+		 */
+		return 0;
 }
 
 const struct address_space_operations nfs_file_aops = {
-- 
cgit v1.2.3-59-g8ed1b


From a634904a7de0d3a0bc606f608007a34e8c05bfee Mon Sep 17 00:00:00 2001
From: ASANO Masahiro <masano@tnes.nec.co.jp>
Date: Tue, 22 Aug 2006 20:06:02 -0400
Subject: VFS: add lookup hint for network file systems

I'm trying to speeding up mkdir(2) for network file systems.  A typical
mkdir(2) calls two inode_operations: lookup and mkdir.  The lookup
operation would fail with ENOENT in common case.  I think it is unnecessary
because the subsequent mkdir operation can check it.  In case of creat(2),
lookup operation is called with the LOOKUP_CREATE flag, so individual
filesystem can omit real lookup.  e.g.  nfs_lookup().

Here is a sample patch which uses LOOKUP_CREATE and O_EXCL on mkdir,
symlink and mknod.  This uses the gadget for creat(2).

And here is the result of a benchmark on NFSv3.
  mkdir(2) 10,000 times:
    original  50.5 sec
    patched   29.0 sec

Signed-off-by: ASANO Masahiro <masano@tnes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from fab7bf44449b29f9d5572a5dd8adcf7c91d5bf0f commit)
---
 fs/namei.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 55a131230f94..863166441bf3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1767,6 +1767,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+	nd->flags |= LOOKUP_CREATE;
+	nd->intent.open.flags = O_EXCL;
 
 	/*
 	 * Do the final lookup.
-- 
cgit v1.2.3-59-g8ed1b


From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Jul 2006 14:11:48 -0700
Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path

Signe-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit)
---
 fs/nfs/idmap.c                     |  3 +--
 include/linux/sunrpc/rpc_pipe_fs.h |  2 +-
 net/sunrpc/auth_gss/auth_gss.c     |  2 +-
 net/sunrpc/rpc_pipe.c              | 20 ++++++--------------
 4 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed3c902..df0be1214358 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -130,9 +130,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
+	rpc_unlink(idmap->idmap_dentry);
 	dput(idmap->idmap_dentry);
-	idmap->idmap_dentry = NULL;
-	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 2c2189cb30aa..04d2767d5ef7 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -44,7 +44,7 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(char *);
 extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
-extern int rpc_unlink(char *);
+extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
 extern void rpc_put_mount(void);
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4a9aa9393b97..beaa7b848246 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -718,7 +718,7 @@ gss_destroy(struct rpc_auth *auth)
 		auth, auth->au_flavor);
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	rpc_unlink(gss_auth->path);
+	rpc_unlink(gss_auth->dentry);
 	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a3bd2db2e024..9144f2767b66 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -746,22 +746,15 @@ err_dput:
 }
 
 int
-rpc_unlink(char *path)
+rpc_unlink(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
+	struct dentry *parent;
 	struct inode *dir;
-	int error;
+	int error = 0;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	d_drop(dentry);
 	if (dentry->d_inode) {
 		rpc_close_pipes(dentry->d_inode);
@@ -769,9 +762,8 @@ rpc_unlink(char *path)
 	}
 	dput(dentry);
 	inode_dir_notify(dir, DN_DELETE);
-out_release:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
+	dput(parent);
 	return error;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 14 Aug 2006 13:11:15 -0400
Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs

rpc_unlink() and rpc_rmdir() will dput the dentry reference for you.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit)
---
 fs/nfs/idmap.c                 |  1 -
 net/sunrpc/auth_gss/auth_gss.c |  1 -
 net/sunrpc/clnt.c              | 15 ++++++---------
 3 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index df0be1214358..07a5dd57646e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -131,7 +131,6 @@ nfs_idmap_delete(struct nfs4_client *clp)
 	if (!idmap)
 		return;
 	rpc_unlink(idmap->idmap_dentry);
-	dput(idmap->idmap_dentry);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index beaa7b848246..ef1cf5b476c8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -719,7 +719,6 @@ gss_destroy(struct rpc_auth *auth)
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	rpc_unlink(gss_auth->dentry);
-	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d307556872db..d9eac7069101 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,7 +184,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
-		dput(clnt->cl_dentry);
 		rpc_put_mount();
 	}
 out_no_path:
@@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
-	if (!IS_ERR(new->cl_dentry)) {
+	if (!IS_ERR(new->cl_dentry))
 		dget(new->cl_dentry);
-		rpc_get_mount();
-	}
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
@@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		clnt->cl_auth = NULL;
 	}
 	if (clnt->cl_parent != clnt) {
+		if (!IS_ERR(clnt->cl_dentry))
+			dput(clnt->cl_dentry);
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (!IS_ERR(clnt->cl_dentry))
+	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
+		rpc_put_mount();
+	}
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
@@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 out_free:
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
-	if (!IS_ERR(clnt->cl_dentry)) {
-		dput(clnt->cl_dentry);
-		rpc_put_mount();
-	}
 	kfree(clnt);
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 01df9c5e918ae5559f2d96da0143f8bfbb9e6171 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 10 Aug 2006 11:58:57 -0400
Subject: LOCKD: Fix a deadlock in nlm_traverse_files()

nlm_traverse_files() is not allowed to hold the nlm_file_mutex while calling
nlm_inspect file, since it may end up calling nlm_release_file() when
releaseing the blocks.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from e558d3cde986e04f68afe8c790ad68ef4b94587a commit)
---
 fs/lockd/svcsubs.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2a4df9b3779a..01b4db9e5466 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -237,19 +237,22 @@ static int
 nlm_traverse_files(struct nlm_host *host, int action)
 {
 	struct nlm_file	*file, **fp;
-	int		i;
+	int i, ret = 0;
 
 	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
 		fp = nlm_files + i;
 		while ((file = *fp) != NULL) {
+			file->f_count++;
+			mutex_unlock(&nlm_file_mutex);
+
 			/* Traverse locks, blocks and shares of this file
 			 * and update file->f_locks count */
-			if (nlm_inspect_file(host, file, action)) {
-				mutex_unlock(&nlm_file_mutex);
-				return 1;
-			}
+			if (nlm_inspect_file(host, file, action))
+				ret = 1;
 
+			mutex_lock(&nlm_file_mutex);
+			file->f_count--;
 			/* No more references to this file. Let go of it. */
 			if (!file->f_blocks && !file->f_locks
 			 && !file->f_shares && !file->f_count) {
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
 		}
 	}
 	mutex_unlock(&nlm_file_mutex);
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 79558f3610efd7928e8882b2eaca3093b283630e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 13:44:32 -0400
Subject: NFS: Fix issue with EIO on NFS read

The problem is that we may be caching writes that would extend the file and
create a hole in the region that we are reading. In this case, we need to
detect the eof from the server, ensure that we zero out the pages that
are part of the hole and mark them as up to date.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 856b603b01b99146918c093969b6cb1b1b0f1c01 commit)
---
 fs/nfs/read.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 65c0c5b32351..da9cf11c326f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
 	base &= ~PAGE_CACHE_MASK;
 	pglen = PAGE_CACHE_SIZE - base;
-	if (pglen < remainder)
+	for (;;) {
+		if (remainder <= pglen) {
+			memclear_highpage_flush(*pages, base, remainder);
+			break;
+		}
 		memclear_highpage_flush(*pages, base, pglen);
-	else
-		memclear_highpage_flush(*pages, base, remainder);
+		pages++;
+		remainder -= pglen;
+		pglen = PAGE_CACHE_SIZE;
+		base = 0;
+	}
 }
 
 /*
@@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	unsigned int base = data->args.pgbase;
 	struct page **pages;
 
+	if (data->res.eof)
+		count = data->args.count;
 	if (unlikely(count == 0))
 		return;
 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
@@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageUptodate(*pages);
-	/*
-	 * Was this an eof or a short read? If the latter, don't mark the page
-	 * as uptodate yet.
-	 */
-	if (count > 0 && (data->res.eof || data->args.count == data->res.count))
+	if (count != 0)
 		SetPageUptodate(*pages);
 }
 
@@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
 	count += base;
 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
 		SetPageError(*pages);
+	if (count != 0)
+		SetPageError(*pages);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Aug 2006 15:44:19 -0400
Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode

Check the bounds of length specifiers more thoroughly in the XDR decoding of
NFS4 readdir reply data.

Currently, if the server returns a bitmap or attr length that causes the
current decode point pointer to wrap, this could go undetected (consider a
small "negative" length on a 32-bit machine).

Also add a check into the main XDR decode handler to make sure that the amount
of data is a multiple of four bytes (as specified by RFC-1014).  This makes
sure that we can do u32* pointer subtraction in the NFS client without risking
an undefined result (the result is undefined if the pointers are not correctly
aligned with respect to one another).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit)
---
 fs/nfs/nfs4xdr.c  | 21 +++++++++++----------
 net/sunrpc/clnt.c | 11 +++++++++++
 2 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d996f49f..730ec8fb31c6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	struct kvec	*iov = rcvbuf->head;
 	unsigned int	nr, pglen = rcvbuf->page_len;
 	uint32_t	*end, *entry, *p, *kaddr;
-	uint32_t	len, attrlen;
+	uint32_t	len, attrlen, xlen;
 	int 		hdrlen, recvd, status;
 
 	status = decode_op_hdr(xdr, OP_READDIR);
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 
 	BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
 	kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
-	end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
-		if (p + 3 > end)
+		if (end - p < 3)
 			goto short_pkt;
 		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
 		p += 2;			/* cookie */
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 			printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
 			goto err_unmap;
 		}
-		dprintk("filename = %*s\n", len, (char *)p);
-		p += XDR_QUADLEN(len);
-		if (p + 1 > end)
+		xlen = XDR_QUADLEN(len);
+		if (end - p < xlen + 1)
 			goto short_pkt;
+		dprintk("filename = %*s\n", len, (char *)p);
+		p += xlen;
 		len = ntohl(*p++);	/* bitmap length */
-		p += len;
-		if (p + 1 > end)
+		if (end - p < len + 1)
 			goto short_pkt;
+		p += len;
 		attrlen = XDR_QUADLEN(ntohl(*p++));
-		p += attrlen;		/* attributes */
-		if (p + 2 > end)
+		if (end - p < attrlen + 2)
 			goto short_pkt;
+		p += attrlen;		/* attributes */
 		entry = p;
 	}
 	if (!nr && (entry[0] != 0 || entry[1] == 0))
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d9eac7069101..3e19d321067a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1181,6 +1181,17 @@ call_verify(struct rpc_task *task)
 	u32	*p = iov->iov_base, n;
 	int error = -EACCES;
 
+	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
+		/* RFC-1014 says that the representation of XDR data must be a
+		 * multiple of four bytes
+		 * - if it isn't pointer subtraction in the NFS client may give
+		 *   undefined results
+		 */
+		printk(KERN_WARNING
+		       "call_verify: XDR representation not a multiple of"
+		       " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
+		goto out_eio;
+	}
 	if ((len -= 3) < 0)
 		goto out_overflow;
 	p += 1;	/* skip XID */
-- 
cgit v1.2.3-59-g8ed1b


From 16b4289c7460ba9c04af40c574949dcca9029658 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 12:27:15 -0400
Subject: NFSv4: Add v4 exception handling for the ACL functions.

This is needed in order to handle any NFS4ERR_DELAY errors that might be
returned by the server. It also ensures that we map the NFSv4 errors before
they are returned to userland.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 71c12b3f0abc7501f6ed231a6d17bc9c05a238dc commit)
---
 fs/nfs/nfs4proc.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6ee97f19d81..153898e1331f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2668,7 +2668,7 @@ out:
 	nfs4_set_cached_acl(inode, acl);
 }
 
-static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
 	struct page *pages[NFS4ACL_MAXPAGES];
 	struct nfs_getaclargs args = {
@@ -2721,6 +2721,19 @@ out_free:
 	return ret;
 }
 
+static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	ssize_t ret;
+	do {
+		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+		if (ret >= 0)
+			break;
+		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+	} while (exception.retry);
+	return ret;
+}
+
 static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	return nfs4_get_acl_uncached(inode, buf, buflen);
 }
 
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct page *pages[NFS4ACL_MAXPAGES];
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 	return ret;
 }
 
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(inode),
+				__nfs4_proc_set_acl(inode, buf, buflen),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-- 
cgit v1.2.3-59-g8ed1b


From a343bb7750e6a098909c34f5c5dfddbc4fa40053 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:03 -0400
Subject: VFS: Fix access("file", X_OK) in the presence of ACLs

Currently, the access() call will return incorrect information on NFS if
there exists an ACL that grants execute access to the user on a regular
file. The reason the information is incorrect is that the VFS overrides
this execute access in open_exec() by checking (inode->i_mode & 0111).

This patch propagates the VFS execute bit check back into the generic
permission() call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 64cbae98848c4c99851cb0a405f0b4982cd76c1e commit)
---
 fs/namei.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 863166441bf3..432d6bc6fab0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
 
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
+	umode_t mode = inode->i_mode;
 	int retval, submask;
 
 	if (mask & MAY_WRITE) {
-		umode_t mode = inode->i_mode;
 
 		/*
 		 * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 
+	/*
+	 * MAY_EXEC on regular files requires special handling: We override
+	 * filesystem execute permissions if the mode bits aren't set.
+	 */
+	if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+		return -EACCES;
+
 	/* Ordinary permission routines do not understand MAY_APPEND. */
 	submask = mask & ~MAY_APPEND;
 	if (inode->i_op && inode->i_op->permission)
-- 
cgit v1.2.3-59-g8ed1b


From 9167b0b9a0ab7907191523f5a0528e3b9c288e21 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:03 -0400
Subject: VFS: Remove redundant open-coded mode bit check in prepare_binfmt().

The check in prepare_binfmt() for inode->i_mode & 0111 is redundant,
since open_exec() will already have done that.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 822dec482ced07af32c378cd936d77345786572b commit)
---
 fs/exec.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 8344ba73a2a6..a6f64a98ac50 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -922,12 +922,6 @@ int prepare_binprm(struct linux_binprm *bprm)
 	int retval;
 
 	mode = inode->i_mode;
-	/*
-	 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-	 * generic_permission lets a non-executable through
-	 */
-	if (!(mode & 0111))	/* with at least _one_ execute bit set */
-		return -EACCES;
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
-- 
cgit v1.2.3-59-g8ed1b


From a969fd5a4e162c4485ae8f3e49d674656a18fa36 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:04 -0400
Subject: VFS: Remove redundant open-coded mode bit checks in open_exec().

The check in open_exec() for inode->i_mode & 0111 has been made
redundant by the fix to permission().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 1d3741c5d991686699f100b65b9956f7ee7ae0ae commit)
---
 fs/exec.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index a6f64a98ac50..f7aabfeca033 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name)
 		if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
 		    S_ISREG(inode->i_mode)) {
 			int err = vfs_permission(&nd, MAY_EXEC);
-			if (!err && !(inode->i_mode & 0111))
-				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
 				file = nameidata_to_filp(&nd, O_RDONLY);
-- 
cgit v1.2.3-59-g8ed1b